library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(scater)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
##
## simplify
## The following objects are masked from 'package:base':
##
## aperm, apply
##
## Attaching package: 'scater'
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## arrange, filter, mutate, rename
## The following object is masked from 'package:stats':
##
## filter
library(dplyr)
library(limma)
##
## Attaching package: 'limma'
## The following object is masked from 'package:scater':
##
## plotMDS
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(SingleCellExperiment)
library(Seurat)
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
## The following object is masked from 'package:tidyr':
##
## expand
library(mclust)
## Package 'mclust' version 5.4.2
## Type 'citation("mclust")' for citing this R package in publications.
##
## Attaching package: 'mclust'
## The following object is masked from 'package:purrr':
##
## map
library(Matrix)
library(tibble)
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following object is masked from 'package:S4Vectors':
##
## first
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(edgeR)
##
## Attaching package: 'edgeR'
## The following object is masked from 'package:SingleCellExperiment':
##
## cpm
## set working directory
setwd("/media/data/mattb/projects/Brian_scRNAseq_website/R_projects/mouse_hypothalamus_scRNAseq")
## Load seurat object
seurat_campbell_chow<- readRDS(file = "./seurat_campbell_chow_just_created.rds")
seurat_campbell_chow
## An object of class seurat in project CAMPBELL_CHOW
## 30000 genes across 11255 samples.
## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)
## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url
## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_chow@raw.data[mito_genes, ]
mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]
seurat_campbell_chow@raw.data[mito_genes_present, 1:5]
## SRR5164436_AAAAAATGCATG SRR5164436_AAAAACACGACG
## ENSMUSG00000000028 1 0
## ENSMUSG00000000037 0 0
## ENSMUSG00000000148 0 0
## ENSMUSG00000000142 0 0
## ENSMUSG00000000183 0 0
## ENSMUSG00000000149 0 0
## ENSMUSG00000000131 0 0
## ENSMUSG00000000125 0 0
## ENSMUSG00000000085 0 0
## ENSMUSG00000000171 0 0
## ENSMUSG00000000093 0 0
## ENSMUSG00000000157 0 0
## ENSMUSG00000000094 0 0
## ENSMUSG00000000127 0 0
## ENSMUSG00000000103 0 0
## ENSMUSG00000000182 0 0
## ENSMUSG00000000058 0 0
## ENSMUSG00000000154 0 0
## ENSMUSG00000000159 0 3
## ENSMUSG00000000056 0 0
## ENSMUSG00000000049 0 0
## ENSMUSG00000000168 0 0
## ENSMUSG00000000134 0 0
## ENSMUSG00000000078 0 0
## ENSMUSG00000000001 0 0
## ENSMUSG00000000194 0 0
## ENSMUSG00000000126 0 0
## ENSMUSG00000000167 0 0
## ENSMUSG00000000184 0 0
## ENSMUSG00000000088 2 0
## ENSMUSG00000000120 0 0
## ENSMUSG00000000031 0 0
## SRR5164436_AAAAAGAAAAAT SRR5164436_AAAAATGCACTA
## ENSMUSG00000000028 0 0
## ENSMUSG00000000037 0 0
## ENSMUSG00000000148 0 0
## ENSMUSG00000000142 0 0
## ENSMUSG00000000183 0 0
## ENSMUSG00000000149 0 0
## ENSMUSG00000000131 0 0
## ENSMUSG00000000125 0 0
## ENSMUSG00000000085 1 0
## ENSMUSG00000000171 0 3
## ENSMUSG00000000093 0 0
## ENSMUSG00000000157 0 0
## ENSMUSG00000000094 0 0
## ENSMUSG00000000127 0 0
## ENSMUSG00000000103 0 0
## ENSMUSG00000000182 0 0
## ENSMUSG00000000058 0 0
## ENSMUSG00000000154 0 0
## ENSMUSG00000000159 0 11
## ENSMUSG00000000056 0 0
## ENSMUSG00000000049 0 0
## ENSMUSG00000000168 0 0
## ENSMUSG00000000134 0 0
## ENSMUSG00000000078 3 0
## ENSMUSG00000000001 0 0
## ENSMUSG00000000194 0 0
## ENSMUSG00000000126 0 0
## ENSMUSG00000000167 0 0
## ENSMUSG00000000184 0 0
## ENSMUSG00000000088 0 2
## ENSMUSG00000000120 0 0
## ENSMUSG00000000031 0 0
## SRR5164436_AAAACACTTCAT
## ENSMUSG00000000028 0
## ENSMUSG00000000037 0
## ENSMUSG00000000148 0
## ENSMUSG00000000142 0
## ENSMUSG00000000183 0
## ENSMUSG00000000149 0
## ENSMUSG00000000131 2
## ENSMUSG00000000125 0
## ENSMUSG00000000085 1
## ENSMUSG00000000171 1
## ENSMUSG00000000093 0
## ENSMUSG00000000157 0
## ENSMUSG00000000094 0
## ENSMUSG00000000127 0
## ENSMUSG00000000103 0
## ENSMUSG00000000182 0
## ENSMUSG00000000058 0
## ENSMUSG00000000154 0
## ENSMUSG00000000159 14
## ENSMUSG00000000056 0
## ENSMUSG00000000049 0
## ENSMUSG00000000168 3
## ENSMUSG00000000134 0
## ENSMUSG00000000078 0
## ENSMUSG00000000001 0
## ENSMUSG00000000194 0
## ENSMUSG00000000126 0
## ENSMUSG00000000167 0
## ENSMUSG00000000184 0
## ENSMUSG00000000088 2
## ENSMUSG00000000120 0
## ENSMUSG00000000031 0
dim(seurat_campbell_chow@raw.data[mito_genes_present, ])
## [1] 32 11255
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_chow@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_chow@raw.data)
## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0006089 0.0011086 0.0013382 0.0018298 0.0106698
## add percentage mitocondrial genes into metadata
seurat_campbell_chow <- AddMetaData(object = seurat_campbell_chow,
metadata = percent_mito,
col.name = "percent_mito")
## Look at the seurat object meta data
head(seurat_campbell_chow@meta.data)
## nGene nUMI orig.ident replicate_name percent_mito
## SRR5164436_AAAAAATGCATG 1158 2043 SRR5164436 SRR5164436 0.0014684288
## SRR5164436_AAAAACACGACG 2171 4531 SRR5164436 SRR5164436 0.0006623979
## SRR5164436_AAAAAGAAAAAT 1286 2154 SRR5164436 SRR5164436 0.0018578727
## SRR5164436_AAAAATGCACTA 3464 8755 SRR5164436 SRR5164436 0.0018289895
## SRR5164436_AAAACACTTCAT 3626 10143 SRR5164436 SRR5164436 0.0022691397
## SRR5164436_AAAACGAACATG 3775 10500 SRR5164436 SRR5164436 0.0012388031
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_chow,
features.plot = c("nGene", "nUMI", "percent_mito"),
nCol = 3,
x.lab.rot = TRUE,
point.size.use = 0.2
)
## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_chow, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_chow, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_chow, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)
GenePlot(object = seurat_campbell_chow, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)
Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 6000 genes expressed.
## manual check; I already know all cells have >800 genes
table(seurat_campbell_chow@meta.data$percent_mito < 0.004 & seurat_campbell_chow@meta.data$nGene<6000)
##
## FALSE TRUE
## 358 10897
# FALSE TRUE
# 358 10897
## Filter cells with <0.4% percent_mito and <6000 genes
seurat_campbell_chow <- FilterCells(object = seurat_campbell_chow,
subset.names = c("nGene", "percent_mito"),
low.thresholds = c(800, -Inf),
high.thresholds = c(6000, 0.004))
seurat_campbell_chow
## An object of class seurat in project CAMPBELL_CHOW
## 30000 genes across 10897 samples.
# An object of class seurat in project CAMPBELL_CHOW
# 30000 genes across 10897 samples.
# 358 cells are filtered out; numbers consistent with above
## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_chow@data),
breaks = 100,
main = "Total expression before normalisation",
xlab = "Sum of expression")
## Normalise gene expression per cell
seurat_campbell_chow <- NormalizeData(object = seurat_campbell_chow, normalization.method = "LogNormalize",
scale.factor = 1e4)
## Plot graph of total expression after normalisation
hist(colSums(seurat_campbell_chow@data),
breaks = 100,
main = "Total expression after normalisation",
xlab = "Sum of expression")
Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.
## Find variable genes by expression
seurat_campbell_chow <- FindVariableGenes(object = seurat_campbell_chow,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.05,
x.high.cutoff = 4,
y.cutoff = 0.75,
num.bin = 20,
binning.method = "equal_width"
)
# number of variable genes
length(seurat_campbell_chow@var.genes)
## [1] 1817
seurat_campbell_chow <- FindVariableGenes(object = seurat_campbell_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1817 variable genes
seurat_campbell_chow <- FindVariableGenes(object = seurat_campbell_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 2346 variable genes
seurat_campbell_chow <- FindVariableGenes(object = seurat_campbell_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 4084 variable genes
Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.
## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_chow <- ScaleData(object = seurat_campbell_chow, vars.to.regress = c("nUMI", "percent_mito", "replicate_name"))
## Regressing out: nUMI, percent_mito, replicate_name
##
## Time Elapsed: 1.362351167202 mins
## Scaling data matrix
Principal component anlysis of variable genes for use in cell clustering.
## Perform principal component analysis on variable genes
seurat_campbell_chow <- RunPCA(object = seurat_campbell_chow,
pc.genes = seurat_campbell_chow@var.genes,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000025468" "ENSMUSG00000043388" "ENSMUSG00000027350"
## [4] "ENSMUSG00000044349" "ENSMUSG00000024261"
## [1] ""
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000018593" "ENSMUSG00000026701"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000067786" "ENSMUSG00000055254" "ENSMUSG00000031760"
## [4] "ENSMUSG00000026701" "ENSMUSG00000021379"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000036905" "ENSMUSG00000036896"
## [4] "ENSMUSG00000036887" "ENSMUSG00000060802"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000032854"
## [4] "ENSMUSG00000076439" "ENSMUSG00000036634"
## [1] ""
## [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715"
## [1] ""
## [1] "ENSMUSG00000079018" "ENSMUSG00000022584" "ENSMUSG00000036256"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000022132" "ENSMUSG00000058897" "ENSMUSG00000033737"
## [4] "ENSMUSG00000007682" "ENSMUSG00000034810"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000038370"
## [4] "ENSMUSG00000027800" "ENSMUSG00000094800"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_chow, pcs.use = 1:9)
## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_chow, dim.1 = 1, dim.2 = 2)
## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_chow, dim.1 = 2, dim.2 = 3)
## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_chow, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_chow, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_chow, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_chow, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_chow,
pc.use = 5:18,
cells.use = 500,
do.balanced = TRUE,
label.columns = FALSE)
## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_chow <- ProjectPCA(object = seurat_campbell_chow, do.print = TRUE)
## [1] "PC1"
## [1] "ENSMUSG00000033061" "ENSMUSG00000047261" "ENSMUSG00000050711"
## [4] "ENSMUSG00000027581" "ENSMUSG00000055430" "ENSMUSG00000019986"
## [7] "ENSMUSG00000026576" "ENSMUSG00000021087" "ENSMUSG00000024268"
## [10] "ENSMUSG00000025468" "ENSMUSG00000043388" "ENSMUSG00000040785"
## [13] "ENSMUSG00000042750" "ENSMUSG00000027500" "ENSMUSG00000029223"
## [16] "ENSMUSG00000022577" "ENSMUSG00000036699" "ENSMUSG00000027350"
## [19] "ENSMUSG00000059361" "ENSMUSG00000019923" "ENSMUSG00000044349"
## [22] "ENSMUSG00000035964" "ENSMUSG00000027273" "ENSMUSG00000018965"
## [25] "ENSMUSG00000039278" "ENSMUSG00000071658" "ENSMUSG00000024261"
## [28] "ENSMUSG00000024423" "ENSMUSG00000000159" "ENSMUSG00000031840"
## [1] ""
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000018593" "ENSMUSG00000026701" "ENSMUSG00000000567"
## [7] "ENSMUSG00000050953" "ENSMUSG00000001025" "ENSMUSG00000031762"
## [10] "ENSMUSG00000035805" "ENSMUSG00000026728" "ENSMUSG00000067786"
## [13] "ENSMUSG00000095538" "ENSMUSG00000022528" "ENSMUSG00000058135"
## [16] "ENSMUSG00000005360" "ENSMUSG00000018102" "ENSMUSG00000027712"
## [19] "ENSMUSG00000029838" "ENSMUSG00000021250" "ENSMUSG00000030342"
## [22] "ENSMUSG00000032231" "ENSMUSG00000034467" "ENSMUSG00000053931"
## [25] "ENSMUSG00000008540" "ENSMUSG00000036570" "ENSMUSG00000026649"
## [28] "ENSMUSG00000017009" "ENSMUSG00000063564" "ENSMUSG00000059970"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000018451" "ENSMUSG00000021270" "ENSMUSG00000067786"
## [4] "ENSMUSG00000055254" "ENSMUSG00000026223" "ENSMUSG00000031428"
## [7] "ENSMUSG00000037926" "ENSMUSG00000052727" "ENSMUSG00000031760"
## [10] "ENSMUSG00000101111" "ENSMUSG00000037852" "ENSMUSG00000039278"
## [13] "ENSMUSG00000026701" "ENSMUSG00000031633" "ENSMUSG00000021379"
## [16] "ENSMUSG00000035805" "ENSMUSG00000034467" "ENSMUSG00000017390"
## [19] "ENSMUSG00000055430" "ENSMUSG00000025666" "ENSMUSG00000019986"
## [22] "ENSMUSG00000026649" "ENSMUSG00000095538" "ENSMUSG00000040785"
## [25] "ENSMUSG00000046432" "ENSMUSG00000079037" "ENSMUSG00000015222"
## [28] "ENSMUSG00000042750" "ENSMUSG00000000567" "ENSMUSG00000049154"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000036905" "ENSMUSG00000036896"
## [4] "ENSMUSG00000036887" "ENSMUSG00000060802" "ENSMUSG00000079018"
## [7] "ENSMUSG00000030579" "ENSMUSG00000075602" "ENSMUSG00000028581"
## [10] "ENSMUSG00000029484" "ENSMUSG00000058715" "ENSMUSG00000026365"
## [13] "ENSMUSG00000024621" "ENSMUSG00000024397" "ENSMUSG00000036256"
## [16] "ENSMUSG00000029622" "ENSMUSG00000064373" "ENSMUSG00000021423"
## [19] "ENSMUSG00000022584" "ENSMUSG00000023992" "ENSMUSG00000015852"
## [22] "ENSMUSG00000056492" "ENSMUSG00000038642" "ENSMUSG00000001123"
## [25] "ENSMUSG00000040584" "ENSMUSG00000020154" "ENSMUSG00000016494"
## [28] "ENSMUSG00000046805" "ENSMUSG00000036353" "ENSMUSG00000041378"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000032854"
## [4] "ENSMUSG00000076439" "ENSMUSG00000036634" "ENSMUSG00000026830"
## [7] "ENSMUSG00000031775" "ENSMUSG00000006782" "ENSMUSG00000046160"
## [10] "ENSMUSG00000050121" "ENSMUSG00000032060" "ENSMUSG00000033579"
## [13] "ENSMUSG00000032517" "ENSMUSG00000022425" "ENSMUSG00000027375"
## [16] "ENSMUSG00000041607" "ENSMUSG00000020486" "ENSMUSG00000027562"
## [19] "ENSMUSG00000027858" "ENSMUSG00000040759" "ENSMUSG00000013523"
## [22] "ENSMUSG00000073680" "ENSMUSG00000022090" "ENSMUSG00000026888"
## [25] "ENSMUSG00000037166" "ENSMUSG00000027199" "ENSMUSG00000043448"
## [28] "ENSMUSG00000090996" "ENSMUSG00000011884" "ENSMUSG00000028412"
## [1] ""
## [1] "ENSMUSG00000027447" "ENSMUSG00000018451" "ENSMUSG00000036896"
## [4] "ENSMUSG00000036887" "ENSMUSG00000036905" "ENSMUSG00000030579"
## [7] "ENSMUSG00000058715" "ENSMUSG00000028581" "ENSMUSG00000024621"
## [10] "ENSMUSG00000022587" "ENSMUSG00000026576" "ENSMUSG00000024397"
## [13] "ENSMUSG00000023992" "ENSMUSG00000021423" "ENSMUSG00000015852"
## [16] "ENSMUSG00000038642" "ENSMUSG00000021268" "ENSMUSG00000008682"
## [19] "ENSMUSG00000046805" "ENSMUSG00000036353" "ENSMUSG00000050711"
## [22] "ENSMUSG00000033061" "ENSMUSG00000048163" "ENSMUSG00000060802"
## [25] "ENSMUSG00000040747" "ENSMUSG00000059498" "ENSMUSG00000030786"
## [28] "ENSMUSG00000050621" "ENSMUSG00000021665" "ENSMUSG00000036438"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715" "ENSMUSG00000024621"
## [7] "ENSMUSG00000021423" "ENSMUSG00000028581" "ENSMUSG00000024397"
## [10] "ENSMUSG00000038642" "ENSMUSG00000023992" "ENSMUSG00000015852"
## [13] "ENSMUSG00000036353" "ENSMUSG00000046805" "ENSMUSG00000048163"
## [16] "ENSMUSG00000030786" "ENSMUSG00000059498" "ENSMUSG00000040747"
## [19] "ENSMUSG00000000682" "ENSMUSG00000021665" "ENSMUSG00000052160"
## [22] "ENSMUSG00000040229" "ENSMUSG00000036908" "ENSMUSG00000018008"
## [25] "ENSMUSG00000030844" "ENSMUSG00000069516" "ENSMUSG00000044811"
## [28] "ENSMUSG00000020377" "ENSMUSG00000089929" "ENSMUSG00000052336"
## [1] ""
## [1] "ENSMUSG00000079018" "ENSMUSG00000022584" "ENSMUSG00000036256"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602" "ENSMUSG00000040584"
## [7] "ENSMUSG00000020154" "ENSMUSG00000041378" "ENSMUSG00000030237"
## [10] "ENSMUSG00000030235" "ENSMUSG00000061353" "ENSMUSG00000029648"
## [13] "ENSMUSG00000039167" "ENSMUSG00000001946" "ENSMUSG00000033960"
## [16] "ENSMUSG00000031239" "ENSMUSG00000039349" "ENSMUSG00000024140"
## [19] "ENSMUSG00000006386" "ENSMUSG00000020717" "ENSMUSG00000114487"
## [22] "ENSMUSG00000042116" "ENSMUSG00000031871" "ENSMUSG00000027435"
## [25] "ENSMUSG00000019966" "ENSMUSG00000034738" "ENSMUSG00000062960"
## [28] "ENSMUSG00000040732" "ENSMUSG00000039831" "ENSMUSG00000045954"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000022132" "ENSMUSG00000058897" "ENSMUSG00000033737"
## [4] "ENSMUSG00000007682" "ENSMUSG00000034810" "ENSMUSG00000029838"
## [7] "ENSMUSG00000022528" "ENSMUSG00000055653" "ENSMUSG00000063564"
## [10] "ENSMUSG00000045005" "ENSMUSG00000030629" "ENSMUSG00000052387"
## [13] "ENSMUSG00000022816" "ENSMUSG00000026185" "ENSMUSG00000047786"
## [16] "ENSMUSG00000021250" "ENSMUSG00000017493" "ENSMUSG00000017390"
## [19] "ENSMUSG00000024518" "ENSMUSG00000064351" "ENSMUSG00000022419"
## [22] "ENSMUSG00000034640" "ENSMUSG00000008540" "ENSMUSG00000030111"
## [25] "ENSMUSG00000064370" "ENSMUSG00000093460" "ENSMUSG00000038418"
## [28] "ENSMUSG00000061718" "ENSMUSG00000027239" "ENSMUSG00000064215"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000038370"
## [4] "ENSMUSG00000027800" "ENSMUSG00000094800" "ENSMUSG00000044772"
## [7] "ENSMUSG00000027744" "ENSMUSG00000072674" "ENSMUSG00000032595"
## [10] "ENSMUSG00000110332" "ENSMUSG00000045655" "ENSMUSG00000041323"
## [13] "ENSMUSG00000047139" "ENSMUSG00000095304" "ENSMUSG00000108841"
## [16] "ENSMUSG00000022037" "ENSMUSG00000047394" "ENSMUSG00000029182"
## [19] "ENSMUSG00000072473" "ENSMUSG00000046242" "ENSMUSG00000047671"
## [22] "ENSMUSG00000044475" "ENSMUSG00000020473" "ENSMUSG00000027360"
## [25] "ENSMUSG00000026683" "ENSMUSG00000028441" "ENSMUSG00000038135"
## [28] "ENSMUSG00000026301" "ENSMUSG00000091345" "ENSMUSG00000027867"
## [1] ""
## [1] ""
## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_chow <- JackStraw(object = seurat_campbell_chow,
num.replicate = 100,
display.progress = TRUE
)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
## Time Elapsed: 7.0821281393369 mins
# Maximum number of PCs allowed = 20.
## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_chow, PCs = 1:20)
## Warning: Removed 25440 rows containing missing values (geom_point).
## An object of class seurat in project CAMPBELL_CHOW
## 30000 genes across 10897 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot
PCElbowPlot(object = seurat_campbell_chow)
There is little difference in the Jackstraw and elbow plot when using 1817, 2346 or 4084 variable genes.
## Cluster cells by PC
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 17:35:29
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_chow)
TSNEPlot(object = seurat_campbell_chow,
no.legend = TRUE,
do.label = TRUE)
## 4084 variable genes = 19 clusters
## 2346 variable genes = 20 clusters
## 1817 variable genes = 20 clusters
table(seurat_campbell_chow@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 2262 1772 1405 1338 653 536 527 343 324 304 250 209 207 188 165
## 15 16 17 18 19
## 136 110 66 58 44
proportion_table<- table(seurat_campbell_chow@ident, seurat_campbell_chow@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
## 0 0.10912 0.37939 0.07658 0.42257 0.31911
## 1 0.16581 0.12646 0.23146 0.11024 0.08535
## 2 0.11880 0.14857 0.12349 0.17585 0.10281
## 3 0.17037 0.04129 0.18938 0.03150 0.05044
## 4 0.07123 0.03609 0.07347 0.03675 0.06305
## 5 0.04501 0.05299 0.03174 0.01837 0.11251
## 6 0.04416 0.04681 0.05864 0.03150 0.04462
## 7 0.04074 0.02341 0.03794 0.00787 0.01455
## 8 0.02764 0.03023 0.02898 0.03937 0.03395
## 9 0.04359 0.01528 0.03277 0.01050 0.00485
## 10 0.02707 0.02438 0.02346 0.00787 0.00873
## 11 0.03020 0.00748 0.02518 0.00262 0.00582
## 12 0.02194 0.01983 0.01035 0.02625 0.02813
## 13 0.02165 0.01073 0.01587 0.01575 0.02619
## 14 0.01880 0.01040 0.01069 0.02100 0.02716
## 15 0.01396 0.01300 0.01414 0.01575 0.00000
## 16 0.00769 0.00293 0.00000 0.02362 0.06305
## 17 0.00969 0.00455 0.00414 0.00262 0.00485
## 18 0.00627 0.00488 0.00586 0.00000 0.00388
## 19 0.00627 0.00130 0.00586 0.00000 0.00097
TSNEPlot(object = seurat_campbell_chow,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = 5))
colnames(proportion_table)<- colnames(table(seurat_campbell_chow@ident,
seurat_campbell_chow@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 17:35:29
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_chow)
TSNEPlot(object = seurat_campbell_chow,
no.legend = TRUE,
do.label = TRUE)
## 4084 variable genes = 21 clusters
## 2346 variable genes = 22 clusters
table(seurat_campbell_chow@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1851 1685 1406 1255 660 631 542 378 343 326 304 285 250 209 188
## 15 16 17 18 19 20
## 165 136 110 70 58 45
proportion_table<- table(seurat_campbell_chow@ident, seurat_campbell_chow@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
## 0 0.08832 0.31209 0.05795 0.32808 0.27934
## 1 0.15954 0.11834 0.22146 0.09449 0.08050
## 2 0.11880 0.14857 0.12384 0.17585 0.10281
## 3 0.15698 0.04454 0.17040 0.03937 0.05626
## 4 0.07322 0.03576 0.07382 0.03675 0.06305
## 5 0.04872 0.06860 0.06175 0.03937 0.05335
## 6 0.04501 0.05299 0.03380 0.02100 0.11154
## 7 0.02991 0.04194 0.03036 0.07612 0.02619
## 8 0.04074 0.02341 0.03794 0.00787 0.01455
## 9 0.02735 0.03056 0.02967 0.03937 0.03395
## 10 0.04359 0.01528 0.03277 0.01050 0.00485
## 11 0.02621 0.02828 0.01932 0.04199 0.03298
## 12 0.02707 0.02438 0.02346 0.00787 0.00873
## 13 0.03020 0.00748 0.02518 0.00262 0.00582
## 14 0.02165 0.01073 0.01587 0.01575 0.02619
## 15 0.01880 0.01040 0.01069 0.02100 0.02716
## 16 0.01396 0.01300 0.01414 0.01575 0.00000
## 17 0.00769 0.00293 0.00000 0.02362 0.06305
## 18 0.00969 0.00455 0.00552 0.00262 0.00485
## 19 0.00627 0.00488 0.00586 0.00000 0.00388
## 20 0.00627 0.00130 0.00621 0.00000 0.00097
TSNEPlot(object = seurat_campbell_chow,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = 5))
colnames(proportion_table)<- colnames(table(seurat_campbell_chow@ident,
seurat_campbell_chow@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 17:35:29
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_chow)
TSNEPlot(object = seurat_campbell_chow,
no.legend = TRUE,
do.label = TRUE)
## 4084 variable genes = 25 clusters
## 2346 variable genes = 24 clusters
table(seurat_campbell_chow@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1603 1257 1061 764 712 671 651 641 581 546 343 320 304 251 209
## 15 16 17 18 19 20 21 22
## 207 188 165 136 111 74 58 44
proportion_table<- table(seurat_campbell_chow@ident, seurat_campbell_chow@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
## 0 0.07179 0.27926 0.04277 0.29134 0.24927
## 1 0.15641 0.04616 0.17075 0.03675 0.05529
## 2 0.10969 0.06469 0.14108 0.04987 0.04753
## 3 0.04046 0.11541 0.03587 0.16010 0.09893
## 4 0.05584 0.06177 0.09107 0.06037 0.03783
## 5 0.04957 0.08355 0.05002 0.10761 0.05238
## 6 0.07151 0.03609 0.07244 0.03675 0.06305
## 7 0.07806 0.03316 0.08762 0.01575 0.00485
## 8 0.04615 0.06047 0.03242 0.03675 0.12124
## 9 0.04501 0.05104 0.05933 0.03150 0.04559
## 10 0.04074 0.02341 0.03794 0.00787 0.01455
## 11 0.02735 0.02958 0.02863 0.03937 0.03395
## 12 0.04359 0.01528 0.03277 0.01050 0.00485
## 13 0.02735 0.02438 0.02346 0.00787 0.00873
## 14 0.03020 0.00748 0.02518 0.00262 0.00582
## 15 0.02194 0.01983 0.01035 0.02625 0.02813
## 16 0.02165 0.01073 0.01587 0.01575 0.02619
## 17 0.01880 0.01040 0.01069 0.02100 0.02716
## 18 0.01396 0.01300 0.01414 0.01575 0.00000
## 19 0.00769 0.00293 0.00000 0.02362 0.06402
## 20 0.00969 0.00520 0.00586 0.00262 0.00582
## 21 0.00627 0.00488 0.00586 0.00000 0.00388
## 22 0.00627 0.00130 0.00586 0.00000 0.00097
TSNEPlot(object = seurat_campbell_chow,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = 5))
colnames(proportion_table)<- colnames(table(seurat_campbell_chow@ident,
seurat_campbell_chow@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 17:35:29
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_chow)
TSNEPlot(object = seurat_campbell_chow,
no.legend = TRUE,
do.label = TRUE)
## 4084 variable genes = 29 clusters
## 2346 variable genes = 28 clusters
table(seurat_campbell_chow@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 959 745 736 674 658 655 618 614 585 572 476 474 436 343 320 304 289 251
## 18 19 20 21 22 23 24 25 26 27
## 209 207 188 136 110 109 70 58 56 45
proportion_table<- table(seurat_campbell_chow@ident, seurat_campbell_chow@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
## 0 0.03419 0.18108 0.02139 0.19423 0.14161
## 1 0.03875 0.11248 0.03484 0.16010 0.09796
## 2 0.08917 0.04486 0.07796 0.03675 0.04365
## 3 0.05185 0.06014 0.08486 0.06037 0.03686
## 4 0.07977 0.03544 0.08831 0.01575 0.00679
## 5 0.06496 0.05104 0.07278 0.04199 0.04171
## 6 0.06781 0.03316 0.06968 0.03150 0.06208
## 7 0.07977 0.00065 0.11142 0.00000 0.00873
## 8 0.03618 0.08778 0.03139 0.11286 0.05238
## 9 0.04587 0.05917 0.03174 0.03412 0.12027
## 10 0.05242 0.01821 0.07692 0.01312 0.00776
## 11 0.02792 0.07217 0.01690 0.08136 0.07177
## 12 0.03162 0.04649 0.03794 0.03412 0.05723
## 13 0.04074 0.02341 0.03794 0.00787 0.01455
## 14 0.02735 0.02958 0.02863 0.03937 0.03395
## 15 0.04359 0.01528 0.03277 0.01050 0.00485
## 16 0.02422 0.02958 0.02725 0.01050 0.02910
## 17 0.02735 0.02438 0.02346 0.00787 0.00873
## 18 0.03020 0.00748 0.02518 0.00262 0.00582
## 19 0.02194 0.01983 0.01035 0.02625 0.02813
## 20 0.02165 0.01073 0.01587 0.01575 0.02619
## 21 0.01396 0.01300 0.01414 0.01575 0.00000
## 22 0.00769 0.00293 0.00000 0.02362 0.06305
## 23 0.01026 0.00943 0.00586 0.01312 0.02134
## 24 0.00969 0.00455 0.00552 0.00262 0.00485
## 25 0.00627 0.00488 0.00586 0.00000 0.00388
## 26 0.00855 0.00098 0.00483 0.00787 0.00582
## 27 0.00627 0.00130 0.00621 0.00000 0.00097
TSNEPlot(object = seurat_campbell_chow,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4 )
# install.packages("PerformanceAnalytics")
proportion_table<- data.frame(matrix(proportion_table, ncol = 5))
colnames(proportion_table)<- colnames(table(seurat_campbell_chow@ident,
seurat_campbell_chow@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
# cor(proportion_table[1],proportion_table[4])
## Cluster cells using final parameters (1817 genes, 20 PC, resolution = 0.6)
seurat_campbell_chow <- FindClusters(object = seurat_campbell_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
## Produce t-SNE
seurat_campbell_chow <- RunTSNE(object = seurat_campbell_chow, dims.use = 1:20, do.fast = TRUE)
Use 1817 genes, 20 principal components and a resolution of 0.6 to give 20 individual clusters. Less clusters are more likely to give more meaningfull functionally distinct groups of cells.
## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_chow_biomarkers <- FindAllMarkers(object = seurat_campbell_chow, only.pos = FALSE, min.pct = 0.2)
## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_chow_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 200 x 7
## # Groups: cluster [20]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0. 0.823 0.94 0.53 0. 0 ENSMUSG00000047261
## 2 0. 0.772 0.859 0.445 0. 0 ENSMUSG00000027500
## 3 0. 0.716 0.982 0.701 0. 0 ENSMUSG00000055430
## 4 9.02e-250 0.740 0.822 0.445 2.71e-245 0 ENSMUSG00000044349
## 5 1.18e-248 0.957 0.757 0.432 3.54e-244 0 ENSMUSG00000060188
## 6 9.45e-220 0.723 0.935 0.717 2.83e-215 0 ENSMUSG00000021268
## 7 2.78e-202 0.689 0.734 0.407 8.35e-198 0 ENSMUSG00000000159
## 8 4.90e-159 0.645 0.561 0.269 1.47e-154 0 ENSMUSG00000066392
## 9 6.27e-139 1.46 0.382 0.156 1.88e-134 0 ENSMUSG00000021647
## 10 1.95e- 79 1.92 0.271 0.122 5.84e- 75 0 ENSMUSG00000020660
## # ... with 190 more rows
# write.csv(as.data.frame(seurat_campbell_chow_biomarkers), file = "seurat_campbell_chow_biomarkers.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_chow_biomarkers.csv", quote = FALSE)
## Perform ROC DE test. This can take a long time.
seurat_campbell_chow_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_chow, only.pos = FALSE, min.pct = 0.2, test.use = "roc")
top10_seurat_campbell_markers_ROC<- seurat_campbell_chow_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 200 x 9
## # Groups: cluster [20]
## myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0.783 0.823 0.566 0.823 0.94 0.53 NA 0 ENSMUSG00…
## 2 0.78 0.716 0.56 0.716 0.982 0.701 NA 0 ENSMUSG00…
## 3 0.755 0.614 0.51 0.614 0.987 0.697 NA 0 ENSMUSG00…
## 4 0.754 0.772 0.508 0.772 0.859 0.445 NA 0 ENSMUSG00…
## 5 0.717 0.740 0.434 0.740 0.822 0.445 NA 0 ENSMUSG00…
## 6 0.715 0.957 0.430 0.957 0.757 0.432 NA 0 ENSMUSG00…
## 7 0.714 0.723 0.428 0.723 0.935 0.717 NA 0 ENSMUSG00…
## 8 0.711 0.634 0.422 0.634 0.803 0.419 NA 0 ENSMUSG00…
## 9 0.705 0.644 0.410 0.644 0.763 0.403 NA 0 ENSMUSG00…
## 10 0.701 0.623 0.402 0.623 0.771 0.399 NA 0 ENSMUSG00…
## # ... with 190 more rows
# write.csv(as.data.frame(seurat_campbell_chow_biomarkers_ROC), file = "seurat_campbell_chow_biomarkers_ROC.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_chow_biomarkers_ROC.csv", quote = FALSE)
## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_chow,
genes.use = top10_seurat_campbell_markers$gene,
slim.col.label = TRUE,
remove.key = TRUE)
## save seurat object as .rds
#saveRDS(seurat_campbell_chow, file = "./seurat_campbell_chow_final.rds")
## Load seurat object
seurat_campbell_batch_edgeR_chow<- readRDS(file = "./seurat_campbell_chow_just_created.rds")
seurat_campbell_batch_edgeR_chow
## An object of class seurat in project CAMPBELL_CHOW
## 30000 genes across 11255 samples.
# An object of class seurat in project CAMPBELL_CHOW
# 30000 genes across 11255 samples.
## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)
## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url
## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_batch_edgeR_chow@raw.data[mito_genes, ]
mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]
seurat_campbell_batch_edgeR_chow@raw.data[mito_genes_present, 1:5]
## SRR5164436_AAAAAATGCATG SRR5164436_AAAAACACGACG
## ENSMUSG00000000028 1 0
## ENSMUSG00000000037 0 0
## ENSMUSG00000000148 0 0
## ENSMUSG00000000142 0 0
## ENSMUSG00000000183 0 0
## ENSMUSG00000000149 0 0
## ENSMUSG00000000131 0 0
## ENSMUSG00000000125 0 0
## ENSMUSG00000000085 0 0
## ENSMUSG00000000171 0 0
## ENSMUSG00000000093 0 0
## ENSMUSG00000000157 0 0
## ENSMUSG00000000094 0 0
## ENSMUSG00000000127 0 0
## ENSMUSG00000000103 0 0
## ENSMUSG00000000182 0 0
## ENSMUSG00000000058 0 0
## ENSMUSG00000000154 0 0
## ENSMUSG00000000159 0 3
## ENSMUSG00000000056 0 0
## ENSMUSG00000000049 0 0
## ENSMUSG00000000168 0 0
## ENSMUSG00000000134 0 0
## ENSMUSG00000000078 0 0
## ENSMUSG00000000001 0 0
## ENSMUSG00000000194 0 0
## ENSMUSG00000000126 0 0
## ENSMUSG00000000167 0 0
## ENSMUSG00000000184 0 0
## ENSMUSG00000000088 2 0
## ENSMUSG00000000120 0 0
## ENSMUSG00000000031 0 0
## SRR5164436_AAAAAGAAAAAT SRR5164436_AAAAATGCACTA
## ENSMUSG00000000028 0 0
## ENSMUSG00000000037 0 0
## ENSMUSG00000000148 0 0
## ENSMUSG00000000142 0 0
## ENSMUSG00000000183 0 0
## ENSMUSG00000000149 0 0
## ENSMUSG00000000131 0 0
## ENSMUSG00000000125 0 0
## ENSMUSG00000000085 1 0
## ENSMUSG00000000171 0 3
## ENSMUSG00000000093 0 0
## ENSMUSG00000000157 0 0
## ENSMUSG00000000094 0 0
## ENSMUSG00000000127 0 0
## ENSMUSG00000000103 0 0
## ENSMUSG00000000182 0 0
## ENSMUSG00000000058 0 0
## ENSMUSG00000000154 0 0
## ENSMUSG00000000159 0 11
## ENSMUSG00000000056 0 0
## ENSMUSG00000000049 0 0
## ENSMUSG00000000168 0 0
## ENSMUSG00000000134 0 0
## ENSMUSG00000000078 3 0
## ENSMUSG00000000001 0 0
## ENSMUSG00000000194 0 0
## ENSMUSG00000000126 0 0
## ENSMUSG00000000167 0 0
## ENSMUSG00000000184 0 0
## ENSMUSG00000000088 0 2
## ENSMUSG00000000120 0 0
## ENSMUSG00000000031 0 0
## SRR5164436_AAAACACTTCAT
## ENSMUSG00000000028 0
## ENSMUSG00000000037 0
## ENSMUSG00000000148 0
## ENSMUSG00000000142 0
## ENSMUSG00000000183 0
## ENSMUSG00000000149 0
## ENSMUSG00000000131 2
## ENSMUSG00000000125 0
## ENSMUSG00000000085 1
## ENSMUSG00000000171 1
## ENSMUSG00000000093 0
## ENSMUSG00000000157 0
## ENSMUSG00000000094 0
## ENSMUSG00000000127 0
## ENSMUSG00000000103 0
## ENSMUSG00000000182 0
## ENSMUSG00000000058 0
## ENSMUSG00000000154 0
## ENSMUSG00000000159 14
## ENSMUSG00000000056 0
## ENSMUSG00000000049 0
## ENSMUSG00000000168 3
## ENSMUSG00000000134 0
## ENSMUSG00000000078 0
## ENSMUSG00000000001 0
## ENSMUSG00000000194 0
## ENSMUSG00000000126 0
## ENSMUSG00000000167 0
## ENSMUSG00000000184 0
## ENSMUSG00000000088 2
## ENSMUSG00000000120 0
## ENSMUSG00000000031 0
dim(seurat_campbell_batch_edgeR_chow@raw.data[mito_genes_present, ])
## [1] 32 11255
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_batch_edgeR_chow@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_batch_edgeR_chow@raw.data)
## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0006089 0.0011086 0.0013382 0.0018298 0.0106698
## add percentage mitocondrial genes into metadata
seurat_campbell_batch_edgeR_chow <- AddMetaData(object = seurat_campbell_batch_edgeR_chow,
metadata = percent_mito,
col.name = "percent_mito")
## Look at the seurat object meta data
head(seurat_campbell_batch_edgeR_chow@meta.data)
## nGene nUMI orig.ident replicate_name percent_mito
## SRR5164436_AAAAAATGCATG 1158 2043 SRR5164436 SRR5164436 0.0014684288
## SRR5164436_AAAAACACGACG 2171 4531 SRR5164436 SRR5164436 0.0006623979
## SRR5164436_AAAAAGAAAAAT 1286 2154 SRR5164436 SRR5164436 0.0018578727
## SRR5164436_AAAAATGCACTA 3464 8755 SRR5164436 SRR5164436 0.0018289895
## SRR5164436_AAAACACTTCAT 3626 10143 SRR5164436 SRR5164436 0.0022691397
## SRR5164436_AAAACGAACATG 3775 10500 SRR5164436 SRR5164436 0.0012388031
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_batch_edgeR_chow,
features.plot = c("nGene", "nUMI", "percent_mito"),
nCol = 3,
x.lab.rot = TRUE,
point.size.use = 0.2
)
## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_batch_edgeR_chow, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_batch_edgeR_chow, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_batch_edgeR_chow, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)
GenePlot(object = seurat_campbell_batch_edgeR_chow, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)
Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 6000 genes expressed.
## manual check; I already know all cells have >800 genes
table(seurat_campbell_batch_edgeR_chow@meta.data$percent_mito < 0.004 & seurat_campbell_batch_edgeR_chow@meta.data$nGene<6000)
##
## FALSE TRUE
## 358 10897
# From: 30000 genes across 11255 samples.
# To:
# FALSE TRUE
# 358 10897
## Filter cells with <0.4% percent_mito and <6000 genes
seurat_campbell_batch_edgeR_chow <- FilterCells(object = seurat_campbell_batch_edgeR_chow,
subset.names = c("nGene", "percent_mito"),
low.thresholds = c(800, -Inf),
high.thresholds = c(6000, 0.004))
seurat_campbell_batch_edgeR_chow
## An object of class seurat in project CAMPBELL_CHOW
## 30000 genes across 10897 samples.
# An object of class seurat in project CAMPBELL_CHOW
# 30000 genes across 10897 samples.
# 358 cells are filtered out; numbers consistent with above
## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_batch_edgeR_chow@data),
breaks = 100,
main = "Total expression before normalisation",
xlab = "Sum of expression")
## Normalise gene expression per cell
seurat_campbell_batch_edgeR_chow <- NormalizeData(object = seurat_campbell_batch_edgeR_chow, normalization.method = "LogNormalize",
scale.factor = 1e4)
## Plot graph of total expression after normalisation
hist(colSums(seurat_campbell_batch_edgeR_chow@data),
breaks = 100,
main = "Total expression after normalisation",
xlab = "Sum of expression")
## Examine data:
seurat_campbell_batch_edgeR_chow@data[1:10,1:10]
## 10 x 10 sparse Matrix of class "dgCMatrix"
## [[ suppressing 10 column names 'SRR5164436_AAAAAATGCATG', 'SRR5164436_AAAAACACGACG', 'SRR5164436_AAAAAGAAAAAT' ... ]]
##
## ENSMUSG00000000001 . . . . . . .
## ENSMUSG00000000028 1.774064 . . . . . .
## ENSMUSG00000000031 . . . . . . .
## ENSMUSG00000000037 . . . . . . .
## ENSMUSG00000000049 . . . . . . .
## ENSMUSG00000000056 . . . . . . .
## ENSMUSG00000000058 . . . . . 1.066726 .
## ENSMUSG00000000078 . . 2.703644 . . 1.904724 .
## ENSMUSG00000000085 . . 1.730714 . 0.6864158 . .
## ENSMUSG00000000088 2.378576 . . 1.189743 1.0896270 1.066726 .
##
## ENSMUSG00000000001 . . .
## ENSMUSG00000000028 . . .
## ENSMUSG00000000031 . . .
## ENSMUSG00000000037 . . .
## ENSMUSG00000000049 . . .
## ENSMUSG00000000056 1.709448 . .
## ENSMUSG00000000058 . . .
## ENSMUSG00000000078 . . .
## ENSMUSG00000000085 . . .
## ENSMUSG00000000088 . 1.160955 .
Paper says removeBatchEffect is an edgeR function but actually it is a limma function!?
# par(mfrow=c(1,2))
# do_PCA(seurat_campbell_batch_edgeR_chow@data, plot_title="before removeBatchEffect()")
seurat_campbell_batch_edgeR_chow@data = removeBatchEffect(seurat_campbell_batch_edgeR_chow@data,
batch = seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)
seurat_campbell_batch_edgeR_chow@data[1:10,1:10]
## SRR5164436_AAAAAATGCATG SRR5164436_AAAAACACGACG
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 1.7742737394 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 -0.0038975079
## ENSMUSG00000000058 -0.0002263577 -0.0002263577
## ENSMUSG00000000078 -0.0071409545 -0.0071409545
## ENSMUSG00000000085 -0.0033954029 -0.0033954029
## ENSMUSG00000000088 2.3764941945 -0.0020815808
## SRR5164436_AAAAAGAAAAAT SRR5164436_AAAAATGCACTA
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 0.0002094769 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 -0.0038975079
## ENSMUSG00000000058 -0.0002263577 -0.0002263577
## ENSMUSG00000000078 2.6965025860 -0.0071409545
## ENSMUSG00000000085 1.7273184316 -0.0033954029
## ENSMUSG00000000088 -0.0020815808 1.1876615160
## SRR5164436_AAAACACTTCAT SRR5164436_AAAACGAACATG
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 0.0002094769 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 -0.0038975079
## ENSMUSG00000000058 -0.0002263577 1.0664999199
## ENSMUSG00000000078 -0.0071409545 1.8975829800
## ENSMUSG00000000085 0.6830204120 -0.0033954029
## ENSMUSG00000000088 1.0875454466 1.0646446969
## SRR5164436_AAAACGACTCAA SRR5164436_AAAACGACTCAC
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 0.0002094769 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 1.7055505837
## ENSMUSG00000000058 -0.0002263577 -0.0002263577
## ENSMUSG00000000078 -0.0071409545 -0.0071409545
## ENSMUSG00000000085 -0.0033954029 -0.0033954029
## ENSMUSG00000000088 -0.0020815808 -0.0020815808
## SRR5164436_AAAACGACTCAG SRR5164436_AAAACGACTCAT
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 0.0002094769 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 -0.0038975079
## ENSMUSG00000000058 -0.0002263577 -0.0002263577
## ENSMUSG00000000078 -0.0071409545 -0.0071409545
## ENSMUSG00000000085 -0.0033954029 -0.0033954029
## ENSMUSG00000000088 1.1588738384 -0.0020815808
# do_PCA(seurat_campbell_batch_edgeR_chow@data, plot_title="after removeBatchEffect()")
Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.
## Find variable genes by expression
seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.05,
x.high.cutoff = 4,
y.cutoff = 0.75,
num.bin = 20,
binning.method = "equal_width"
)
# number of variable genes
length(seurat_campbell_batch_edgeR_chow@var.genes)
## [1] 1645
seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1645 variable genes
seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 2121 variable genes
seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 3463 variable genes
seurat_campbell_batch_edgeR_chow <- FindVariableGenes(object = seurat_campbell_batch_edgeR_chow, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 300, binning.method = “equal_width” )
== 2631 variable genes
Scale gene expression per cell by building linear models for nUMI, percent_mito. Do not scale for mouse replicate as this has already been corrected for using limma?
## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_batch_edgeR_chow <- ScaleData(object = seurat_campbell_batch_edgeR_chow, vars.to.regress = c("nUMI", "percent_mito"))
## Regressing out: nUMI, percent_mito
##
## Time Elapsed: 49.757223367691 secs
## Scaling data matrix
seurat_campbell_batch_edgeR_chow@data[1:10,1:10]
## SRR5164436_AAAAAATGCATG SRR5164436_AAAAACACGACG
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 1.7742737394 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 -0.0038975079
## ENSMUSG00000000058 -0.0002263577 -0.0002263577
## ENSMUSG00000000078 -0.0071409545 -0.0071409545
## ENSMUSG00000000085 -0.0033954029 -0.0033954029
## ENSMUSG00000000088 2.3764941945 -0.0020815808
## SRR5164436_AAAAAGAAAAAT SRR5164436_AAAAATGCACTA
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 0.0002094769 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 -0.0038975079
## ENSMUSG00000000058 -0.0002263577 -0.0002263577
## ENSMUSG00000000078 2.6965025860 -0.0071409545
## ENSMUSG00000000085 1.7273184316 -0.0033954029
## ENSMUSG00000000088 -0.0020815808 1.1876615160
## SRR5164436_AAAACACTTCAT SRR5164436_AAAACGAACATG
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 0.0002094769 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 -0.0038975079
## ENSMUSG00000000058 -0.0002263577 1.0664999199
## ENSMUSG00000000078 -0.0071409545 1.8975829800
## ENSMUSG00000000085 0.6830204120 -0.0033954029
## ENSMUSG00000000088 1.0875454466 1.0646446969
## SRR5164436_AAAACGACTCAA SRR5164436_AAAACGACTCAC
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 0.0002094769 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 1.7055505837
## ENSMUSG00000000058 -0.0002263577 -0.0002263577
## ENSMUSG00000000078 -0.0071409545 -0.0071409545
## ENSMUSG00000000085 -0.0033954029 -0.0033954029
## ENSMUSG00000000088 -0.0020815808 -0.0020815808
## SRR5164436_AAAACGACTCAG SRR5164436_AAAACGACTCAT
## ENSMUSG00000000001 -0.0106040862 -0.0106040862
## ENSMUSG00000000028 0.0002094769 0.0002094769
## ENSMUSG00000000031 -0.0016074305 -0.0016074305
## ENSMUSG00000000037 0.0011305821 0.0011305821
## ENSMUSG00000000049 -0.0011731937 -0.0011731937
## ENSMUSG00000000056 -0.0038975079 -0.0038975079
## ENSMUSG00000000058 -0.0002263577 -0.0002263577
## ENSMUSG00000000078 -0.0071409545 -0.0071409545
## ENSMUSG00000000085 -0.0033954029 -0.0033954029
## ENSMUSG00000000088 1.1588738384 -0.0020815808
Principal component anlysis of variable genes for use in cell clustering.
## Perform principal component analysis on variable genes
seurat_campbell_batch_edgeR_chow <- RunPCA(object = seurat_campbell_batch_edgeR_chow,
pc.genes = seurat_campbell_batch_edgeR_chow@var.genes,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000026701" "ENSMUSG00000000567"
## [1] ""
## [1] "ENSMUSG00000050711" "ENSMUSG00000027350" "ENSMUSG00000027273"
## [4] "ENSMUSG00000024261" "ENSMUSG00000000159"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000020077" "ENSMUSG00000036905" "ENSMUSG00000060802"
## [4] "ENSMUSG00000036896" "ENSMUSG00000036887"
## [1] ""
## [1] "ENSMUSG00000067786" "ENSMUSG00000055254" "ENSMUSG00000031760"
## [4] "ENSMUSG00000037852" "ENSMUSG00000026701"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000032854"
## [4] "ENSMUSG00000076439" "ENSMUSG00000036634"
## [1] ""
## [1] "ENSMUSG00000027447" "ENSMUSG00000036896" "ENSMUSG00000036887"
## [4] "ENSMUSG00000036905" "ENSMUSG00000030579"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000079018" "ENSMUSG00000022584" "ENSMUSG00000036256"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602"
## [1] ""
## [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000022132" "ENSMUSG00000058897" "ENSMUSG00000033737"
## [4] "ENSMUSG00000007682" "ENSMUSG00000034810"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000038370"
## [4] "ENSMUSG00000027800" "ENSMUSG00000094800"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_batch_edgeR_chow, pcs.use = 1:9)
## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_batch_edgeR_chow, dim.1 = 1, dim.2 = 2)
## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_batch_edgeR_chow, dim.1 = 2, dim.2 = 3)
## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 5 genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow, pc.use = 5, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_batch_edgeR_chow,
pc.use = 6:20,
cells.use = 500,
do.balanced = TRUE,
label.columns = FALSE)
## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_batch_edgeR_chow <- ProjectPCA(object = seurat_campbell_batch_edgeR_chow, do.print = TRUE)
## [1] "PC1"
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000026701" "ENSMUSG00000000567" "ENSMUSG00000050953"
## [7] "ENSMUSG00000018593" "ENSMUSG00000001025" "ENSMUSG00000031762"
## [10] "ENSMUSG00000035805" "ENSMUSG00000026728" "ENSMUSG00000067786"
## [13] "ENSMUSG00000095538" "ENSMUSG00000022528" "ENSMUSG00000058135"
## [16] "ENSMUSG00000005360" "ENSMUSG00000018102" "ENSMUSG00000027712"
## [19] "ENSMUSG00000029838" "ENSMUSG00000021250" "ENSMUSG00000030342"
## [22] "ENSMUSG00000032231" "ENSMUSG00000034467" "ENSMUSG00000008540"
## [25] "ENSMUSG00000053931" "ENSMUSG00000036570" "ENSMUSG00000026649"
## [28] "ENSMUSG00000063564" "ENSMUSG00000055254" "ENSMUSG00000017009"
## [1] ""
## [1] "ENSMUSG00000033061" "ENSMUSG00000050711" "ENSMUSG00000047261"
## [4] "ENSMUSG00000027581" "ENSMUSG00000055430" "ENSMUSG00000019986"
## [7] "ENSMUSG00000026576" "ENSMUSG00000024268" "ENSMUSG00000021087"
## [10] "ENSMUSG00000025468" "ENSMUSG00000043388" "ENSMUSG00000040785"
## [13] "ENSMUSG00000042750" "ENSMUSG00000027500" "ENSMUSG00000029223"
## [16] "ENSMUSG00000022577" "ENSMUSG00000036699" "ENSMUSG00000027350"
## [19] "ENSMUSG00000059361" "ENSMUSG00000019923" "ENSMUSG00000044349"
## [22] "ENSMUSG00000035964" "ENSMUSG00000027273" "ENSMUSG00000018965"
## [25] "ENSMUSG00000071658" "ENSMUSG00000039278" "ENSMUSG00000024261"
## [28] "ENSMUSG00000031840" "ENSMUSG00000024423" "ENSMUSG00000000159"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000020077" "ENSMUSG00000036905" "ENSMUSG00000060802"
## [4] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000079018"
## [7] "ENSMUSG00000075602" "ENSMUSG00000029484" "ENSMUSG00000030579"
## [10] "ENSMUSG00000028581" "ENSMUSG00000058715" "ENSMUSG00000036256"
## [13] "ENSMUSG00000026365" "ENSMUSG00000029622" "ENSMUSG00000022584"
## [16] "ENSMUSG00000024621" "ENSMUSG00000064373" "ENSMUSG00000024397"
## [19] "ENSMUSG00000056492" "ENSMUSG00000021423" "ENSMUSG00000023992"
## [22] "ENSMUSG00000015852" "ENSMUSG00000038642" "ENSMUSG00000001123"
## [25] "ENSMUSG00000040584" "ENSMUSG00000020154" "ENSMUSG00000016494"
## [28] "ENSMUSG00000041378" "ENSMUSG00000046805" "ENSMUSG00000030237"
## [1] ""
## [1] "ENSMUSG00000018451" "ENSMUSG00000021270" "ENSMUSG00000067786"
## [4] "ENSMUSG00000055254" "ENSMUSG00000026223" "ENSMUSG00000031428"
## [7] "ENSMUSG00000052727" "ENSMUSG00000037926" "ENSMUSG00000039278"
## [10] "ENSMUSG00000101111" "ENSMUSG00000031760" "ENSMUSG00000037852"
## [13] "ENSMUSG00000031633" "ENSMUSG00000026701" "ENSMUSG00000055430"
## [16] "ENSMUSG00000021379" "ENSMUSG00000019986" "ENSMUSG00000040785"
## [19] "ENSMUSG00000035805" "ENSMUSG00000046432" "ENSMUSG00000017390"
## [22] "ENSMUSG00000042750" "ENSMUSG00000034467" "ENSMUSG00000079037"
## [25] "ENSMUSG00000025666" "ENSMUSG00000015222" "ENSMUSG00000021087"
## [28] "ENSMUSG00000095538" "ENSMUSG00000026649" "ENSMUSG00000064357"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000032854"
## [4] "ENSMUSG00000076439" "ENSMUSG00000036634" "ENSMUSG00000026830"
## [7] "ENSMUSG00000031775" "ENSMUSG00000006782" "ENSMUSG00000046160"
## [10] "ENSMUSG00000032060" "ENSMUSG00000050121" "ENSMUSG00000033579"
## [13] "ENSMUSG00000022425" "ENSMUSG00000032517" "ENSMUSG00000027375"
## [16] "ENSMUSG00000041607" "ENSMUSG00000020486" "ENSMUSG00000027562"
## [19] "ENSMUSG00000040759" "ENSMUSG00000027858" "ENSMUSG00000013523"
## [22] "ENSMUSG00000073680" "ENSMUSG00000022090" "ENSMUSG00000026888"
## [25] "ENSMUSG00000037166" "ENSMUSG00000027199" "ENSMUSG00000043448"
## [28] "ENSMUSG00000028412" "ENSMUSG00000011884" "ENSMUSG00000090996"
## [1] ""
## [1] "ENSMUSG00000027447" "ENSMUSG00000036896" "ENSMUSG00000036887"
## [4] "ENSMUSG00000036905" "ENSMUSG00000018451" "ENSMUSG00000030579"
## [7] "ENSMUSG00000058715" "ENSMUSG00000028581" "ENSMUSG00000024621"
## [10] "ENSMUSG00000024397" "ENSMUSG00000023992" "ENSMUSG00000021423"
## [13] "ENSMUSG00000022587" "ENSMUSG00000015852" "ENSMUSG00000038642"
## [16] "ENSMUSG00000026576" "ENSMUSG00000046805" "ENSMUSG00000036353"
## [19] "ENSMUSG00000008682" "ENSMUSG00000021268" "ENSMUSG00000048163"
## [22] "ENSMUSG00000040747" "ENSMUSG00000060802" "ENSMUSG00000059498"
## [25] "ENSMUSG00000050711" "ENSMUSG00000030786" "ENSMUSG00000021665"
## [28] "ENSMUSG00000033061" "ENSMUSG00000030844" "ENSMUSG00000000682"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000079018" "ENSMUSG00000022584" "ENSMUSG00000036256"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602" "ENSMUSG00000040584"
## [7] "ENSMUSG00000020154" "ENSMUSG00000041378" "ENSMUSG00000030237"
## [10] "ENSMUSG00000030235" "ENSMUSG00000061353" "ENSMUSG00000029648"
## [13] "ENSMUSG00000039167" "ENSMUSG00000001946" "ENSMUSG00000033960"
## [16] "ENSMUSG00000031239" "ENSMUSG00000039349" "ENSMUSG00000024140"
## [19] "ENSMUSG00000020717" "ENSMUSG00000006386" "ENSMUSG00000114487"
## [22] "ENSMUSG00000042116" "ENSMUSG00000031871" "ENSMUSG00000027435"
## [25] "ENSMUSG00000019966" "ENSMUSG00000034738" "ENSMUSG00000062960"
## [28] "ENSMUSG00000045954" "ENSMUSG00000040732" "ENSMUSG00000039831"
## [1] ""
## [1] "ENSMUSG00000036896" "ENSMUSG00000036887" "ENSMUSG00000036905"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715" "ENSMUSG00000024621"
## [7] "ENSMUSG00000021423" "ENSMUSG00000028581" "ENSMUSG00000024397"
## [10] "ENSMUSG00000038642" "ENSMUSG00000023992" "ENSMUSG00000015852"
## [13] "ENSMUSG00000036353" "ENSMUSG00000046805" "ENSMUSG00000048163"
## [16] "ENSMUSG00000030786" "ENSMUSG00000059498" "ENSMUSG00000040747"
## [19] "ENSMUSG00000000682" "ENSMUSG00000021665" "ENSMUSG00000052160"
## [22] "ENSMUSG00000040229" "ENSMUSG00000036908" "ENSMUSG00000018008"
## [25] "ENSMUSG00000030844" "ENSMUSG00000069516" "ENSMUSG00000044811"
## [28] "ENSMUSG00000020377" "ENSMUSG00000089929" "ENSMUSG00000052336"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000022132" "ENSMUSG00000058897" "ENSMUSG00000033737"
## [4] "ENSMUSG00000007682" "ENSMUSG00000034810" "ENSMUSG00000055653"
## [7] "ENSMUSG00000029838" "ENSMUSG00000022528" "ENSMUSG00000045005"
## [10] "ENSMUSG00000063564" "ENSMUSG00000030629" "ENSMUSG00000022816"
## [13] "ENSMUSG00000026185" "ENSMUSG00000017493" "ENSMUSG00000052387"
## [16] "ENSMUSG00000047786" "ENSMUSG00000024518" "ENSMUSG00000021250"
## [19] "ENSMUSG00000017390" "ENSMUSG00000064351" "ENSMUSG00000022419"
## [22] "ENSMUSG00000037206" "ENSMUSG00000030111" "ENSMUSG00000064370"
## [25] "ENSMUSG00000034640" "ENSMUSG00000008540" "ENSMUSG00000027239"
## [28] "ENSMUSG00000061718" "ENSMUSG00000093460" "ENSMUSG00000038418"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000038370"
## [4] "ENSMUSG00000027800" "ENSMUSG00000094800" "ENSMUSG00000044772"
## [7] "ENSMUSG00000027744" "ENSMUSG00000032595" "ENSMUSG00000072674"
## [10] "ENSMUSG00000110332" "ENSMUSG00000045655" "ENSMUSG00000041323"
## [13] "ENSMUSG00000047139" "ENSMUSG00000108841" "ENSMUSG00000095304"
## [16] "ENSMUSG00000022037" "ENSMUSG00000047394" "ENSMUSG00000029182"
## [19] "ENSMUSG00000046242" "ENSMUSG00000072473" "ENSMUSG00000047671"
## [22] "ENSMUSG00000044475" "ENSMUSG00000027360" "ENSMUSG00000020473"
## [25] "ENSMUSG00000028441" "ENSMUSG00000026683" "ENSMUSG00000038135"
## [28] "ENSMUSG00000026301" "ENSMUSG00000091345" "ENSMUSG00000027867"
## [1] ""
## [1] ""
## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_batch_edgeR_chow <- JackStraw(object = seurat_campbell_batch_edgeR_chow,
num.replicate = 100,
display.progress = TRUE
)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
## Time Elapsed: 6.37038615544637 mins
# Maximum number of PCs allowed = 20.
## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_batch_edgeR_chow, PCs = 1:20)
## Warning: Removed 23020 rows containing missing values (geom_point).
## An object of class seurat in project CAMPBELL_CHOW
## 30000 genes across 10897 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot
PCElbowPlot(object = seurat_campbell_batch_edgeR_chow)
Using the limma batch corrected data 20 PC’s are still significant.
## Cluster cells by PC
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_batch_edgeR_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 20:32:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_batch_edgeR_chow)
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
no.legend = TRUE,
do.label = TRUE)
## 1645 variable genes = 19 clusters
table(seurat_campbell_batch_edgeR_chow@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 3612 1755 1398 613 553 513 343 323 303 263 250 210 188 165 135
## 15 16 17 18
## 107 64 58 44
proportion_table<- table(seurat_campbell_batch_edgeR_chow@ident, seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
## 0 0.28433 0.41417 0.27423 0.43832 0.36663
## 1 0.16553 0.12386 0.23042 0.09974 0.08438
## 2 0.11795 0.14759 0.12349 0.17585 0.10184
## 3 0.06838 0.03251 0.06830 0.03150 0.06111
## 4 0.04615 0.05202 0.05795 0.03412 0.04850
## 5 0.03989 0.05657 0.02415 0.03675 0.11154
## 6 0.04046 0.02341 0.03829 0.00787 0.01455
## 7 0.02764 0.02991 0.02932 0.03675 0.03395
## 8 0.04330 0.01528 0.03277 0.01050 0.00485
## 9 0.02507 0.02568 0.01656 0.04199 0.03104
## 10 0.02707 0.02438 0.02346 0.00787 0.00873
## 11 0.03048 0.00748 0.02518 0.00262 0.00582
## 12 0.02137 0.01073 0.01621 0.01575 0.02619
## 13 0.01880 0.01040 0.01069 0.02100 0.02716
## 14 0.01425 0.01300 0.01345 0.01575 0.00000
## 15 0.00712 0.00260 0.00000 0.02100 0.06402
## 16 0.00969 0.00423 0.00379 0.00262 0.00485
## 17 0.00627 0.00488 0.00586 0.00000 0.00388
## 18 0.00627 0.00130 0.00586 0.00000 0.00097
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = 5))
colnames(proportion_table)<- colnames(table(seurat_campbell_batch_edgeR_chow@ident,
seurat_campbell_batch_edgeR_chow@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_batch_edgeR_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 20:32:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_batch_edgeR_chow)
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
no.legend = TRUE,
do.label = TRUE)
## 1645 variable genes = 20 clusters
table(seurat_campbell_batch_edgeR_chow@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 2075 1730 1483 1397 615 593 529 343 322 303 286 250 210 188 165
## 15 16 17 18 19
## 135 107 64 58 44
proportion_table<- table(seurat_campbell_batch_edgeR_chow@ident, seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
## 0 0.11140 0.34590 0.08037 0.38845 0.23181
## 1 0.16439 0.12126 0.22732 0.09974 0.08050
## 2 0.16752 0.06307 0.18800 0.05249 0.13191
## 3 0.11795 0.14759 0.12315 0.17585 0.10184
## 4 0.06838 0.03251 0.06864 0.03150 0.06208
## 5 0.04957 0.05754 0.06106 0.03412 0.05044
## 6 0.04188 0.05624 0.02760 0.03412 0.11251
## 7 0.04046 0.02341 0.03829 0.00787 0.01455
## 8 0.02764 0.02991 0.02898 0.03675 0.03395
## 9 0.04330 0.01528 0.03277 0.01050 0.00485
## 10 0.02621 0.02828 0.01932 0.04199 0.03395
## 11 0.02707 0.02438 0.02346 0.00787 0.00873
## 12 0.03048 0.00748 0.02518 0.00262 0.00582
## 13 0.02137 0.01073 0.01621 0.01575 0.02619
## 14 0.01880 0.01040 0.01069 0.02100 0.02716
## 15 0.01425 0.01300 0.01345 0.01575 0.00000
## 16 0.00712 0.00260 0.00000 0.02100 0.06402
## 17 0.00969 0.00423 0.00379 0.00262 0.00485
## 18 0.00627 0.00488 0.00586 0.00000 0.00388
## 19 0.00627 0.00130 0.00586 0.00000 0.00097
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = 5))
colnames(proportion_table)<- colnames(table(seurat_campbell_batch_edgeR_chow@ident,
seurat_campbell_batch_edgeR_chow@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_batch_edgeR_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 20:32:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_batch_edgeR_chow)
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
no.legend = TRUE,
do.label = TRUE)
## 1645 variable genes = 24 clusters
table(seurat_campbell_batch_edgeR_chow@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1741 1071 934 763 742 622 609 607 600 547 343 328 303 256 250
## 15 16 17 18 19 20 21 22 23
## 210 209 188 165 136 107 64 58 44
proportion_table<- table(seurat_campbell_batch_edgeR_chow@ident, seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
## 0 0.09345 0.29746 0.06278 0.30446 0.19399
## 1 0.10940 0.06534 0.14384 0.05249 0.04753
## 2 0.12479 0.00878 0.14453 0.01575 0.04268
## 3 0.03989 0.11834 0.03518 0.16273 0.09214
## 4 0.05983 0.06404 0.09314 0.06037 0.04074
## 5 0.07778 0.02731 0.08658 0.01312 0.00873
## 6 0.06781 0.03218 0.06795 0.03150 0.06111
## 7 0.04644 0.06144 0.04795 0.04462 0.09602
## 8 0.05043 0.05787 0.06209 0.03412 0.05044
## 9 0.04217 0.05819 0.02863 0.03937 0.11833
## 10 0.04046 0.02341 0.03829 0.00787 0.01455
## 11 0.02764 0.03088 0.02967 0.03675 0.03492
## 12 0.04330 0.01528 0.03277 0.01050 0.00485
## 13 0.01339 0.03999 0.01138 0.07087 0.02522
## 14 0.02707 0.02438 0.02346 0.00787 0.00873
## 15 0.03048 0.00748 0.02518 0.00262 0.00582
## 16 0.02194 0.02048 0.01035 0.02887 0.02716
## 17 0.02137 0.01073 0.01621 0.01575 0.02619
## 18 0.01880 0.01040 0.01069 0.02100 0.02716
## 19 0.01425 0.01300 0.01380 0.01575 0.00000
## 20 0.00712 0.00260 0.00000 0.02100 0.06402
## 21 0.00969 0.00423 0.00379 0.00262 0.00485
## 22 0.00627 0.00488 0.00586 0.00000 0.00388
## 23 0.00627 0.00130 0.00586 0.00000 0.00097
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = 5))
colnames(proportion_table)<- colnames(table(seurat_campbell_batch_edgeR_chow@ident,
seurat_campbell_batch_edgeR_chow@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_batch_edgeR_chow)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 20:32:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_batch_edgeR_chow)
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
no.legend = TRUE,
do.label = TRUE)
## 1645 variable genes = 29 clusters
table(seurat_campbell_batch_edgeR_chow@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 924 902 880 870 758 648 622 613 605 501 343 325 310 303 290 286 273 250
## 18 19 20 21 22 23 24 25 26 27 28
## 222 188 170 136 110 107 64 58 55 44 40
proportion_table<- table(seurat_campbell_batch_edgeR_chow@ident, seurat_campbell_batch_edgeR_chow@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164436 SRR5164437 SRR5164438 SRR5164443 SRR5164445
## 0 0.12251 0.00975 0.14315 0.01837 0.04074
## 1 0.05071 0.15085 0.03587 0.12861 0.10378
## 2 0.04672 0.14824 0.03036 0.18373 0.09893
## 3 0.08405 0.05559 0.12660 0.03412 0.02328
## 4 0.03960 0.11736 0.03518 0.16273 0.09117
## 5 0.06382 0.04909 0.07347 0.03937 0.04365
## 6 0.07778 0.02731 0.08658 0.01312 0.00873
## 7 0.04701 0.06339 0.04795 0.03937 0.09602
## 8 0.06695 0.03218 0.06761 0.03150 0.06111
## 9 0.03989 0.05267 0.02518 0.02887 0.11154
## 10 0.04046 0.02341 0.03829 0.00787 0.01455
## 11 0.02764 0.03023 0.02932 0.03675 0.03492
## 12 0.02593 0.02893 0.03484 0.02362 0.01940
## 13 0.04330 0.01528 0.03277 0.01050 0.00485
## 14 0.02422 0.02893 0.02691 0.01050 0.03298
## 15 0.02621 0.02828 0.01932 0.04199 0.03395
## 16 0.01396 0.04259 0.01345 0.07612 0.02425
## 17 0.02707 0.02438 0.02346 0.00787 0.00873
## 18 0.01795 0.01691 0.02829 0.02625 0.01455
## 19 0.02137 0.01073 0.01621 0.01575 0.02619
## 20 0.02593 0.00423 0.02035 0.00262 0.00582
## 21 0.01425 0.01300 0.01380 0.01575 0.00000
## 22 0.01026 0.00975 0.00586 0.01312 0.02134
## 23 0.00712 0.00260 0.00000 0.02100 0.06402
## 24 0.00969 0.00423 0.00379 0.00262 0.00485
## 25 0.00627 0.00488 0.00586 0.00000 0.00388
## 26 0.00855 0.00065 0.00483 0.00787 0.00582
## 27 0.00627 0.00130 0.00586 0.00000 0.00097
## 28 0.00456 0.00325 0.00483 0.00000 0.00000
TSNEPlot(object = seurat_campbell_batch_edgeR_chow,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4 )
# install.packages("PerformanceAnalytics")
proportion_table<- data.frame(matrix(proportion_table, ncol = 5))
colnames(proportion_table)<- colnames(table(seurat_campbell_batch_edgeR_chow@ident,
seurat_campbell_batch_edgeR_chow@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
# cor(proportion_table[1],proportion_table[4])
## Cluster cells using final parameters (1645 genes, 20 PC, resolution = 0.6)
seurat_campbell_batch_edgeR_chow <- FindClusters(object = seurat_campbell_batch_edgeR_chow, reduction.type = "pca", dims.use = 1:20,
resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
## Produce t-SNE
seurat_campbell_batch_edgeR_chow <- RunTSNE(object = seurat_campbell_batch_edgeR_chow, dims.use = 1:20, do.fast = TRUE)
Use 1645 genes, 20 principal components and a resolution of 0.6 to give 19 individual clusters. Although, there is good correlation between the proportion of cells in each cluster from each experimental batch using a resolution of 0.6, there is still batch effects visible. Therefore, the results of using the limma batch correction feature is similar to using the seurat v2.0 scaledata batch correction (maybe a both use a linear model) so there is no benefit to using the limma feature like the Campbell paper.
## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_batch_edgeR_chow_biomarkers <- FindAllMarkers(object = seurat_campbell_batch_edgeR_chow, only.pos = FALSE, min.pct = 0.2)
## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_batch_edgeR_chow_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 190 x 7
## # Groups: cluster [19]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0. 1.01 0.948 0.842 0. 0 ENSMUSG00000027500
## 2 0. 0.993 0.976 0.875 0. 0 ENSMUSG00000047261
## 3 0. 0.915 0.995 0.903 0. 0 ENSMUSG00000055430
## 4 6.76e-261 0.865 0.779 0.562 2.03e-256 0 ENSMUSG00000048978
## 5 3.10e-256 0.913 0.946 0.921 9.31e-252 0 ENSMUSG00000066392
## 6 4.94e- 75 0.960 0.686 0.683 1.48e- 70 0 ENSMUSG00000036357
## 7 1.47e- 50 1.56 0.744 0.763 4.40e- 46 0 ENSMUSG00000021647
## 8 4.69e- 13 2.48 0.697 0.756 1.41e- 8 0 ENSMUSG00000020660
## 9 1.27e- 12 1.56 0.54 0.474 3.81e- 8 0 ENSMUSG00000032291
## 10 1.26e- 4 1.41 0.858 0.895 1.00e+ 0 0 ENSMUSG00000037727
## # ... with 180 more rows
write.csv(as.data.frame(seurat_campbell_batch_edgeR_chow_biomarkers), file = "seurat_campbell_batch_edgeR_chow_biomarkers.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_batch_edgeR_chow_biomarkers.csv", quote = FALSE)
## Perform ROC DE test. This can take a long time.
seurat_campbell_batch_edgeR_chow_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_batch_edgeR_chow, only.pos = FALSE, min.pct = 0.2, test.use = "roc")
top10_seurat_campbell_markers_ROC<- seurat_campbell_batch_edgeR_chow_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 190 x 9
## # Groups: cluster [19]
## myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0.82 0.915 0.640 0.915 0.995 0.903 NA 0 ENSMUSG00…
## 2 0.808 0.993 0.616 0.993 0.976 0.875 NA 0 ENSMUSG00…
## 3 0.8 0.799 0.6 0.799 0.993 0.89 NA 0 ENSMUSG00…
## 4 0.787 1.01 0.574 1.01 0.948 0.842 NA 0 ENSMUSG00…
## 5 0.76 0.762 0.52 0.762 0.979 0.892 NA 0 ENSMUSG00…
## 6 0.733 0.749 0.466 0.749 0.939 0.855 NA 0 ENSMUSG00…
## 7 0.73 0.776 0.46 0.776 0.916 0.84 NA 0 ENSMUSG00…
## 8 0.721 0.796 0.442 0.796 0.991 0.953 NA 0 ENSMUSG00…
## 9 0.714 0.779 0.428 0.779 0.962 0.938 NA 0 ENSMUSG00…
## 10 0.701 0.865 0.402 0.865 0.779 0.562 NA 0 ENSMUSG00…
## # ... with 180 more rows
write.csv(as.data.frame(seurat_campbell_batch_edgeR_chow_biomarkers_ROC), file = "seurat_campbell_batch_edgeR_chow_biomarkers_ROC.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_batch_edgeR_chow_biomarkers_ROC.csv", quote = FALSE)
## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_batch_edgeR_chow,
genes.use = top10_seurat_campbell_markers$gene,
slim.col.label = TRUE,
remove.key = TRUE)
## save seurat object as .rds
saveRDS(seurat_campbell_batch_edgeR_chow, file = "./seurat_campbell_batch_edgeR_chow_final.rds")
## Load seurat object
seurat_campbell_fasted<- readRDS(file = "./seurat_campbell_fasted_just_created.rds")
seurat_campbell_fasted
## An object of class seurat in project CAMPBELL_FASTED
## 21789 genes across 3783 samples.
## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)
## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url
## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_fasted@raw.data[mito_genes, ]
mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]
seurat_campbell_fasted@raw.data[mito_genes_present, 1:5]
## SRR5164441_AAAAAGGGATGC SRR5164441_AAAAATTCGGGC
## ENSMUSG00000000028 0 0
## ENSMUSG00000000037 0 0
## ENSMUSG00000000159 1 3
## ENSMUSG00000000149 0 0
## ENSMUSG00000000223 0 0
## ENSMUSG00000000167 0 0
## ENSMUSG00000000142 0 0
## ENSMUSG00000000127 0 0
## ENSMUSG00000000085 0 0
## ENSMUSG00000000202 0 0
## ENSMUSG00000000093 0 0
## ENSMUSG00000000171 0 0
## ENSMUSG00000000120 0 0
## ENSMUSG00000000134 0 0
## ENSMUSG00000000125 0 0
## ENSMUSG00000000214 0 0
## ENSMUSG00000000058 0 0
## ENSMUSG00000000168 0 0
## ENSMUSG00000000184 0 0
## ENSMUSG00000000056 0 0
## ENSMUSG00000000049 0 0
## ENSMUSG00000000197 0 1
## ENSMUSG00000000148 0 0
## ENSMUSG00000000078 0 0
## ENSMUSG00000000001 0 0
## ENSMUSG00000000247 0 0
## ENSMUSG00000000131 0 0
## ENSMUSG00000000194 0 0
## ENSMUSG00000000244 0 0
## ENSMUSG00000000088 0 1
## ENSMUSG00000000126 0 0
## ENSMUSG00000000031 0 0
## SRR5164441_AAAACGGAAACT SRR5164441_AAAACTACAACT
## ENSMUSG00000000028 0 0
## ENSMUSG00000000037 0 0
## ENSMUSG00000000159 14 0
## ENSMUSG00000000149 0 0
## ENSMUSG00000000223 0 0
## ENSMUSG00000000167 0 0
## ENSMUSG00000000142 0 2
## ENSMUSG00000000127 0 0
## ENSMUSG00000000085 0 0
## ENSMUSG00000000202 0 0
## ENSMUSG00000000093 0 0
## ENSMUSG00000000171 0 0
## ENSMUSG00000000120 0 0
## ENSMUSG00000000134 0 0
## ENSMUSG00000000125 0 0
## ENSMUSG00000000214 0 0
## ENSMUSG00000000058 0 0
## ENSMUSG00000000168 0 0
## ENSMUSG00000000184 0 0
## ENSMUSG00000000056 0 0
## ENSMUSG00000000049 0 0
## ENSMUSG00000000197 1 0
## ENSMUSG00000000148 0 0
## ENSMUSG00000000078 0 0
## ENSMUSG00000000001 0 0
## ENSMUSG00000000247 0 0
## ENSMUSG00000000131 0 0
## ENSMUSG00000000194 0 0
## ENSMUSG00000000244 0 0
## ENSMUSG00000000088 0 0
## ENSMUSG00000000126 0 0
## ENSMUSG00000000031 0 0
## SRR5164441_AAAACTGGTTAT
## ENSMUSG00000000028 0
## ENSMUSG00000000037 0
## ENSMUSG00000000159 0
## ENSMUSG00000000149 0
## ENSMUSG00000000223 0
## ENSMUSG00000000167 0
## ENSMUSG00000000142 0
## ENSMUSG00000000127 0
## ENSMUSG00000000085 0
## ENSMUSG00000000202 0
## ENSMUSG00000000093 0
## ENSMUSG00000000171 0
## ENSMUSG00000000120 0
## ENSMUSG00000000134 0
## ENSMUSG00000000125 0
## ENSMUSG00000000214 0
## ENSMUSG00000000058 0
## ENSMUSG00000000168 0
## ENSMUSG00000000184 0
## ENSMUSG00000000056 0
## ENSMUSG00000000049 0
## ENSMUSG00000000197 0
## ENSMUSG00000000148 0
## ENSMUSG00000000078 0
## ENSMUSG00000000001 0
## ENSMUSG00000000247 0
## ENSMUSG00000000131 0
## ENSMUSG00000000194 0
## ENSMUSG00000000244 0
## ENSMUSG00000000088 0
## ENSMUSG00000000126 0
## ENSMUSG00000000031 0
dim(seurat_campbell_fasted@raw.data[mito_genes_present, ])
## [1] 32 3783
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_fasted@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_fasted@raw.data)
## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0007047 0.0013918 0.0016627 0.0023381 0.0120724
## add percentage mitocondrial genes into metadata
seurat_campbell_fasted <- AddMetaData(object = seurat_campbell_fasted,
metadata = percent_mito,
col.name = "percent_mito")
## Look at the seurat object meta data
head(seurat_campbell_fasted@meta.data)
## nGene nUMI orig.ident replicate_name percent_mito
## SRR5164441_AAAAAGGGATGC 1001 1632 SRR5164441 SRR5164441 0.0006127451
## SRR5164441_AAAAATTCGGGC 1789 3300 SRR5164441 SRR5164441 0.0015165302
## SRR5164441_AAAACGGAAACT 1833 3370 SRR5164441 SRR5164441 0.0044589774
## SRR5164441_AAAACTACAACT 1653 2753 SRR5164441 SRR5164441 0.0007278020
## SRR5164441_AAAACTGGTTAT 834 1639 SRR5164441 SRR5164441 0.0000000000
## SRR5164441_AAAACTTCTACA 1334 2115 SRR5164441 SRR5164441 0.0033112583
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_fasted,
features.plot = c("nGene", "nUMI", "percent_mito"),
nCol = 3,
x.lab.rot = TRUE,
point.size.use = 0.2
)
## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_fasted, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_fasted, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_fasted, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)
GenePlot(object = seurat_campbell_fasted, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)
Filter cells out with more than 0.5% of total gene expression comming from mitocondrially encoded genes and more than 4000 genes expressed.
## manual check; I already know all cells have >800 genes
table(seurat_campbell_fasted@meta.data$percent_mito < 0.005 & seurat_campbell_fasted@meta.data$nGene<4000)
##
## FALSE TRUE
## 143 3640
# FALSE TRUE
# 143 3640
## Filter cells with <0.5% percent_mito and <4000 genes
seurat_campbell_fasted <- FilterCells(object = seurat_campbell_fasted,
subset.names = c("nGene", "percent_mito"),
low.thresholds = c(800, -Inf),
high.thresholds = c(4000, 0.005))
seurat_campbell_fasted
## An object of class seurat in project CAMPBELL_FASTED
## 21789 genes across 3640 samples.
# An object of class seurat in project CAMPBELL_FASTED
# 21789 genes across 3640 samples.
## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_fasted@data),
breaks = 100,
main = "Total expression before normalisation",
xlab = "Sum of expression")
## Normalise gene expression per cell
seurat_campbell_fasted <- NormalizeData(object = seurat_campbell_fasted, normalization.method = "LogNormalize", scale.factor = 10000)
## Plot graph of total expression after normalisation
hist(colSums(as.data.frame(as.matrix(seurat_campbell_fasted@data))),
breaks = 100,
main = "Total expression after normalisation",
xlab = "Sum of expression")
Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.
## Find variable genes by expression
seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.05,
x.high.cutoff = 3,
y.cutoff = 0.75,
num.bin = 20,
binning.method = "equal_width"
)
# number of variable genes
length(seurat_campbell_fasted@var.genes)
## [1] 1379
seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1384 variable genes
seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 2356 variable genes
seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1848 variable genes
seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 3006 variable genes
seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 40, binning.method = “equal_width” )
== 2870 variable gene
seurat_campbell_fasted <- FindVariableGenes(object = seurat_campbell_fasted, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 3006 variable genes
Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.
## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_fasted <- ScaleData(object = seurat_campbell_fasted, vars.to.regress = c("nUMI", "percent_mito", "replicate_name"))
## Regressing out: nUMI, percent_mito, replicate_name
##
## Time Elapsed: 29.1311025619507 secs
## Scaling data matrix
Principal component anlysis of variable genes for use in cell clustering.
## Perform principal component analysis on variable genes
seurat_campbell_fasted <- RunPCA(object = seurat_campbell_fasted,
pc.genes = seurat_campbell_fasted@var.genes,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000026385" "ENSMUSG00000018593" "ENSMUSG00000031765"
## [4] "ENSMUSG00000050953" "ENSMUSG00000000567"
## [1] ""
## [1] "ENSMUSG00000027350" "ENSMUSG00000021700" "ENSMUSG00000026787"
## [4] "ENSMUSG00000066392" "ENSMUSG00000027523"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000041323"
## [4] "ENSMUSG00000110332" "ENSMUSG00000020473"
## [1] ""
## [1] "ENSMUSG00000027375" "ENSMUSG00000079018" "ENSMUSG00000075602"
## [4] "ENSMUSG00000022584" "ENSMUSG00000024140"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000026830" "ENSMUSG00000036634"
## [4] "ENSMUSG00000076439" "ENSMUSG00000032517"
## [1] ""
## [1] "ENSMUSG00000079018" "ENSMUSG00000075602" "ENSMUSG00000040584"
## [4] "ENSMUSG00000022584" "ENSMUSG00000041378"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000022528" "ENSMUSG00000063564" "ENSMUSG00000007682"
## [4] "ENSMUSG00000028195" "ENSMUSG00000045005"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000094800"
## [4] "ENSMUSG00000038370" "ENSMUSG00000027800"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000029838" "ENSMUSG00000030428" "ENSMUSG00000055254"
## [4] "ENSMUSG00000045092" "ENSMUSG00000056492"
## [1] ""
## [1] "ENSMUSG00000038642" "ENSMUSG00000036905" "ENSMUSG00000036896"
## [4] "ENSMUSG00000024621" "ENSMUSG00000036887"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_fasted, pcs.use = 1:9)
## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_fasted, dim.1 = 1, dim.2 = 2)
## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_fasted, dim.1 = 2, dim.2 = 3)
## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_fasted, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_fasted, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_fasted, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_fasted, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_fasted,
pc.use = 5:18,
cells.use = 500,
do.balanced = TRUE,
label.columns = FALSE)
## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_fasted <- ProjectPCA(object = seurat_campbell_fasted, do.print = TRUE)
## [1] "PC1"
## [1] "ENSMUSG00000026385" "ENSMUSG00000002985" "ENSMUSG00000018593"
## [4] "ENSMUSG00000031765" "ENSMUSG00000050953" "ENSMUSG00000000567"
## [7] "ENSMUSG00000031762" "ENSMUSG00000026728" "ENSMUSG00000058135"
## [10] "ENSMUSG00000034467" "ENSMUSG00000070306" "ENSMUSG00000043164"
## [13] "ENSMUSG00000001025" "ENSMUSG00000035805" "ENSMUSG00000005360"
## [16] "ENSMUSG00000030342" "ENSMUSG00000094800" "ENSMUSG00000095538"
## [19] "ENSMUSG00000041323" "ENSMUSG00000017009" "ENSMUSG00000033208"
## [22] "ENSMUSG00000027800" "ENSMUSG00000019232" "ENSMUSG00000026649"
## [25] "ENSMUSG00000110332" "ENSMUSG00000026701" "ENSMUSG00000053931"
## [28] "ENSMUSG00000020473" "ENSMUSG00000024411" "ENSMUSG00000008540"
## [1] ""
## [1] "ENSMUSG00000021268" "ENSMUSG00000044349" "ENSMUSG00000019986"
## [4] "ENSMUSG00000050711" "ENSMUSG00000033061" "ENSMUSG00000040785"
## [7] "ENSMUSG00000055430" "ENSMUSG00000021087" "ENSMUSG00000026576"
## [10] "ENSMUSG00000043388" "ENSMUSG00000024268" "ENSMUSG00000064341"
## [13] "ENSMUSG00000027581" "ENSMUSG00000019923" "ENSMUSG00000047261"
## [16] "ENSMUSG00000027273" "ENSMUSG00000042750" "ENSMUSG00000002265"
## [19] "ENSMUSG00000107169" "ENSMUSG00000029223" "ENSMUSG00000027350"
## [22] "ENSMUSG00000033981" "ENSMUSG00000043384" "ENSMUSG00000025468"
## [25] "ENSMUSG00000024261" "ENSMUSG00000027500" "ENSMUSG00000024423"
## [28] "ENSMUSG00000060188" "ENSMUSG00000000159" "ENSMUSG00000025579"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000070306" "ENSMUSG00000021268" "ENSMUSG00000026223"
## [4] "ENSMUSG00000043164" "ENSMUSG00000019986" "ENSMUSG00000040785"
## [7] "ENSMUSG00000021270" "ENSMUSG00000055430" "ENSMUSG00000021087"
## [10] "ENSMUSG00000041323" "ENSMUSG00000110332" "ENSMUSG00000020473"
## [13] "ENSMUSG00000044772" "ENSMUSG00000044349" "ENSMUSG00000031428"
## [16] "ENSMUSG00000022037" "ENSMUSG00000027744" "ENSMUSG00000043384"
## [19] "ENSMUSG00000024033" "ENSMUSG00000036438" "ENSMUSG00000032595"
## [22] "ENSMUSG00000050711" "ENSMUSG00000033061" "ENSMUSG00000042750"
## [25] "ENSMUSG00000094800" "ENSMUSG00000034467" "ENSMUSG00000009281"
## [28] "ENSMUSG00000072674" "ENSMUSG00000026576" "ENSMUSG00000047394"
## [1] ""
## [1] "ENSMUSG00000027375" "ENSMUSG00000079018" "ENSMUSG00000075602"
## [4] "ENSMUSG00000022584" "ENSMUSG00000024140" "ENSMUSG00000040584"
## [7] "ENSMUSG00000041378" "ENSMUSG00000030237" "ENSMUSG00000056492"
## [10] "ENSMUSG00000030235" "ENSMUSG00000020154" "ENSMUSG00000029648"
## [13] "ENSMUSG00000033960" "ENSMUSG00000039167" "ENSMUSG00000027199"
## [16] "ENSMUSG00000020717" "ENSMUSG00000037625" "ENSMUSG00000022548"
## [19] "ENSMUSG00000026193" "ENSMUSG00000064373" "ENSMUSG00000026830"
## [22] "ENSMUSG00000027562" "ENSMUSG00000031425" "ENSMUSG00000036634"
## [25] "ENSMUSG00000076439" "ENSMUSG00000032517" "ENSMUSG00000020077"
## [28] "ENSMUSG00000031775" "ENSMUSG00000001946" "ENSMUSG00000015090"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000026830" "ENSMUSG00000036634"
## [4] "ENSMUSG00000076439" "ENSMUSG00000031425" "ENSMUSG00000032517"
## [7] "ENSMUSG00000050121" "ENSMUSG00000015090" "ENSMUSG00000032060"
## [10] "ENSMUSG00000006782" "ENSMUSG00000027562" "ENSMUSG00000090639"
## [13] "ENSMUSG00000041607" "ENSMUSG00000032854" "ENSMUSG00000022425"
## [16] "ENSMUSG00000020774" "ENSMUSG00000070354" "ENSMUSG00000022548"
## [19] "ENSMUSG00000033579" "ENSMUSG00000046160" "ENSMUSG00000040759"
## [22] "ENSMUSG00000027858" "ENSMUSG00000090996" "ENSMUSG00000073680"
## [25] "ENSMUSG00000047976" "ENSMUSG00000013523" "ENSMUSG00000020486"
## [28] "ENSMUSG00000027199" "ENSMUSG00000037166" "ENSMUSG00000039904"
## [1] ""
## [1] "ENSMUSG00000079018" "ENSMUSG00000075602" "ENSMUSG00000040584"
## [4] "ENSMUSG00000022584" "ENSMUSG00000041378" "ENSMUSG00000056492"
## [7] "ENSMUSG00000030237" "ENSMUSG00000020154" "ENSMUSG00000030235"
## [10] "ENSMUSG00000029648" "ENSMUSG00000039167" "ENSMUSG00000020717"
## [13] "ENSMUSG00000026193" "ENSMUSG00000033960" "ENSMUSG00000001946"
## [16] "ENSMUSG00000024140" "ENSMUSG00000034738" "ENSMUSG00000042745"
## [19] "ENSMUSG00000031239" "ENSMUSG00000020077" "ENSMUSG00000032232"
## [22] "ENSMUSG00000029802" "ENSMUSG00000019966" "ENSMUSG00000006386"
## [25] "ENSMUSG00000039349" "ENSMUSG00000036256" "ENSMUSG00000029484"
## [28] "ENSMUSG00000000530" "ENSMUSG00000026814" "ENSMUSG00000026921"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000022528" "ENSMUSG00000063564" "ENSMUSG00000007682"
## [4] "ENSMUSG00000028195" "ENSMUSG00000045005" "ENSMUSG00000017390"
## [7] "ENSMUSG00000047786" "ENSMUSG00000034640" "ENSMUSG00000029838"
## [10] "ENSMUSG00000022132" "ENSMUSG00000026701" "ENSMUSG00000056380"
## [13] "ENSMUSG00000021250" "ENSMUSG00000018451" "ENSMUSG00000035686"
## [16] "ENSMUSG00000027004" "ENSMUSG00000021732" "ENSMUSG00000033737"
## [19] "ENSMUSG00000044177" "ENSMUSG00000022419" "ENSMUSG00000027447"
## [22] "ENSMUSG00000030428" "ENSMUSG00000000247" "ENSMUSG00000045817"
## [25] "ENSMUSG00000000567" "ENSMUSG00000003545" "ENSMUSG00000049929"
## [28] "ENSMUSG00000005089" "ENSMUSG00000030905" "ENSMUSG00000058897"
## [1] ""
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000094800"
## [4] "ENSMUSG00000038370" "ENSMUSG00000027800" "ENSMUSG00000044772"
## [7] "ENSMUSG00000032595" "ENSMUSG00000072674" "ENSMUSG00000027744"
## [10] "ENSMUSG00000020473" "ENSMUSG00000110332" "ENSMUSG00000021270"
## [13] "ENSMUSG00000041323" "ENSMUSG00000026683" "ENSMUSG00000047139"
## [16] "ENSMUSG00000108841" "ENSMUSG00000021950" "ENSMUSG00000095304"
## [19] "ENSMUSG00000045655" "ENSMUSG00000044475" "ENSMUSG00000026301"
## [22] "ENSMUSG00000072473" "ENSMUSG00000022449" "ENSMUSG00000046242"
## [25] "ENSMUSG00000096054" "ENSMUSG00000047394" "ENSMUSG00000033208"
## [28] "ENSMUSG00000045954" "ENSMUSG00000091345" "ENSMUSG00000037926"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000018451" "ENSMUSG00000029838" "ENSMUSG00000101111"
## [4] "ENSMUSG00000068923" "ENSMUSG00000064341" "ENSMUSG00000065947"
## [7] "ENSMUSG00000040785" "ENSMUSG00000064370" "ENSMUSG00000031633"
## [10] "ENSMUSG00000020315" "ENSMUSG00000064356" "ENSMUSG00000064367"
## [13] "ENSMUSG00000030654" "ENSMUSG00000022892" "ENSMUSG00000021087"
## [16] "ENSMUSG00000055430" "ENSMUSG00000100862" "ENSMUSG00000030428"
## [19] "ENSMUSG00000055254" "ENSMUSG00000079037" "ENSMUSG00000043384"
## [22] "ENSMUSG00000052727" "ENSMUSG00000015222" "ENSMUSG00000045092"
## [25] "ENSMUSG00000056492" "ENSMUSG00000005125" "ENSMUSG00000032766"
## [28] "ENSMUSG00000034723" "ENSMUSG00000017390" "ENSMUSG00000063564"
## [1] ""
## [1] "ENSMUSG00000038642" "ENSMUSG00000036905" "ENSMUSG00000036896"
## [4] "ENSMUSG00000024621" "ENSMUSG00000036887" "ENSMUSG00000030579"
## [7] "ENSMUSG00000058715" "ENSMUSG00000021423" "ENSMUSG00000028581"
## [10] "ENSMUSG00000023992" "ENSMUSG00000046805" "ENSMUSG00000036353"
## [13] "ENSMUSG00000000982" "ENSMUSG00000089929" "ENSMUSG00000015852"
## [16] "ENSMUSG00000021665" "ENSMUSG00000024397" "ENSMUSG00000018008"
## [19] "ENSMUSG00000018930" "ENSMUSG00000026395" "ENSMUSG00000040552"
## [22] "ENSMUSG00000074622" "ENSMUSG00000069516" "ENSMUSG00000024401"
## [25] "ENSMUSG00000040229" "ENSMUSG00000022952" "ENSMUSG00000059498"
## [28] "ENSMUSG00000032691" "ENSMUSG00000030786" "ENSMUSG00000048163"
## [1] ""
## [1] ""
## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_fasted <- JackStraw(object = seurat_campbell_fasted,
num.replicate = 100,
display.progress = TRUE
)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
## Time Elapsed: 2.14932174285253 mins
# Maximum number of PCs allowed = 20.
## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_fasted, PCs = 1:20)
## Warning: Removed 19616 rows containing missing values (geom_point).
## An object of class seurat in project CAMPBELL_FASTED
## 21789 genes across 3640 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot
PCElbowPlot(object = seurat_campbell_fasted)
## Cluster cells by PC
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_fasted)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 22:43:07
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_fasted)
TSNEPlot(object = seurat_campbell_fasted,
no.legend = TRUE,
do.label = TRUE)
## 1379 variable genes = 16 clusters
## 1379 variable genes, 10 PC = 13 clusters
## 3006 variable genes, 20 PC = 16 clusters
table(seurat_campbell_fasted@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 1091 451 376 335 252 242 188 184 134 132 64 47 40 38 33
## 15
## 33
proportion_table<- table(seurat_campbell_fasted@ident, seurat_campbell_fasted@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164441 SRR5164444 SRR5164446
## 0 0.33306 0.21617 0.25075
## 1 0.09480 0.18629 0.17568
## 2 0.08940 0.10896 0.14865
## 3 0.11185 0.06854 0.04054
## 4 0.07401 0.06151 0.05856
## 5 0.07069 0.10018 0.02252
## 6 0.04865 0.08436 0.03453
## 7 0.04200 0.08084 0.05556
## 8 0.03035 0.01406 0.07958
## 9 0.03493 0.04042 0.03754
## 10 0.02412 0.00000 0.00901
## 11 0.01331 0.02109 0.00450
## 12 0.00457 0.00527 0.03904
## 13 0.01414 0.00703 0.00000
## 14 0.01081 0.00527 0.00601
## 15 0.00333 0.00000 0.03754
TSNEPlot(object = seurat_campbell_fasted,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = ncol(proportion_table)))
colnames(proportion_table)<- colnames(table(seurat_campbell_fasted@ident,
seurat_campbell_fasted@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20,
resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_fasted)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 22:43:07
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_fasted)
TSNEPlot(object = seurat_campbell_fasted,
no.legend = TRUE,
do.label = TRUE)
## 1379 variable genes = 19 clusters
## 1379 variable genes, 10 PC = 15 clusters
## 3006 variable genes, 20 PC = 18 clusters
table(seurat_campbell_fasted@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 572 493 450 337 276 270 249 184 177 134 106 98 64 47 43 38 35 34
## 18
## 33
proportion_table<- table(seurat_campbell_fasted@ident, seurat_campbell_fasted@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164441 SRR5164444 SRR5164446
## 0 0.16674 0.10018 0.17117
## 1 0.15967 0.11072 0.06907
## 2 0.09439 0.18629 0.17568
## 3 0.11185 0.06854 0.04354
## 4 0.05904 0.10545 0.11111
## 5 0.07900 0.06503 0.06456
## 6 0.07277 0.10193 0.02402
## 7 0.04699 0.08436 0.03453
## 8 0.03992 0.07733 0.05556
## 9 0.03035 0.01406 0.07958
## 10 0.03243 0.00703 0.03604
## 11 0.02495 0.02636 0.03453
## 12 0.02412 0.00000 0.00901
## 13 0.01331 0.02109 0.00450
## 14 0.00582 0.00527 0.03904
## 15 0.01414 0.00703 0.00000
## 16 0.00998 0.01406 0.00450
## 17 0.01123 0.00527 0.00601
## 18 0.00333 0.00000 0.03754
TSNEPlot(object = seurat_campbell_fasted,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = ncol(proportion_table)))
colnames(proportion_table)<- colnames(table(seurat_campbell_fasted@ident,
seurat_campbell_fasted@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20,
resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_fasted)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 22:43:07
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_fasted)
TSNEPlot(object = seurat_campbell_fasted,
no.legend = TRUE,
do.label = TRUE)
## 1379 variable genes = 20 clusters
## 1379 variable genes, 10 PC = 19 clusters
## 3006 variable genes, 20 PC = 20 clusters
table(seurat_campbell_fasted@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 490 464 449 337 271 269 252 189 184 134 106 102 99 64 47 43 38 35
## 18 19
## 34 33
proportion_table<- table(seurat_campbell_fasted@ident, seurat_campbell_fasted@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164441 SRR5164444 SRR5164446
## 0 0.15925 0.10721 0.06907
## 1 0.15551 0.07557 0.07057
## 2 0.09439 0.18453 0.17568
## 3 0.11185 0.06854 0.04354
## 4 0.07942 0.06503 0.06456
## 5 0.05696 0.10193 0.11111
## 6 0.07401 0.10193 0.02402
## 7 0.04823 0.08612 0.03604
## 8 0.04200 0.08084 0.05556
## 9 0.03035 0.01406 0.07958
## 10 0.03243 0.00703 0.03604
## 11 0.00832 0.02812 0.09910
## 12 0.02536 0.02636 0.03453
## 13 0.02412 0.00000 0.00901
## 14 0.01331 0.02109 0.00450
## 15 0.00582 0.00527 0.03904
## 16 0.01414 0.00703 0.00000
## 17 0.00998 0.01406 0.00450
## 18 0.01123 0.00527 0.00601
## 19 0.00333 0.00000 0.03754
TSNEPlot(object = seurat_campbell_fasted,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4
)
proportion_table<- data.frame(matrix(proportion_table, ncol = ncol(proportion_table)))
colnames(proportion_table)<- colnames(table(seurat_campbell_fasted@ident,
seurat_campbell_fasted@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
#######
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20,
resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_fasted)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 22:43:07
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_fasted)
TSNEPlot(object = seurat_campbell_fasted,
no.legend = TRUE,
do.label = TRUE)
## 1379 variable genes = 23 clusters
## 1379 variable genes, 10 PC = 21 clusters
## 3006 variable genes, 20 PC = 21 clusters
table(seurat_campbell_fasted@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 476 431 258 253 251 249 232 219 189 177 134 125 119 97 94 64 47 45
## 18 19 20 21 22
## 40 38 35 34 33
proportion_table<- table(seurat_campbell_fasted@ident, seurat_campbell_fasted@meta.data$replicate_name)
proportion_table<- round(prop.table(proportion_table, 2), 5)
proportion_table
##
## SRR5164441 SRR5164444 SRR5164446
## 0 0.15593 0.10369 0.06306
## 1 0.14761 0.06327 0.06006
## 2 0.05447 0.10193 0.10360
## 3 0.07443 0.10193 0.02402
## 4 0.07110 0.06678 0.06306
## 5 0.08690 0.03339 0.03153
## 6 0.08690 0.02460 0.01351
## 7 0.00748 0.16169 0.16366
## 8 0.04823 0.08612 0.03604
## 9 0.03992 0.07733 0.05556
## 10 0.03035 0.01406 0.07958
## 11 0.03701 0.01054 0.04505
## 12 0.01247 0.03339 0.10511
## 13 0.02495 0.02636 0.03303
## 14 0.02661 0.03691 0.01351
## 15 0.02412 0.00000 0.00901
## 16 0.01331 0.02109 0.00450
## 17 0.00624 0.00527 0.04054
## 18 0.01331 0.00527 0.00751
## 19 0.01414 0.00703 0.00000
## 20 0.00998 0.01406 0.00450
## 21 0.01123 0.00527 0.00601
## 22 0.00333 0.00000 0.03754
TSNEPlot(object = seurat_campbell_fasted,
group.by = "replicate_name",
no.legend = FALSE,
do.label = FALSE,
pt.size = 0.4 )
proportion_table<- data.frame(matrix(proportion_table, ncol = ncol(proportion_table)))
colnames(proportion_table)<- colnames(table(seurat_campbell_fasted@ident,
seurat_campbell_fasted@meta.data$replicate_name))
chart.Correlation(data.frame( proportion_table[1:5,]) )
# cor(proportion_table[1],proportion_table[4])
## Cluster cells using final parameters (1817 genes, 20 PC, resolution = 0.6)
seurat_campbell_fasted <- FindClusters(object = seurat_campbell_fasted, reduction.type = "pca", dims.use = 1:20, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
## Produce t-SNE of final parameters:
seurat_campbell_fasted <- RunTSNE(object = seurat_campbell_fasted, dims.use = 1:20, do.fast = TRUE)
Use 1379 variable genes and 20 principal components with a resolution of 0.6. This gives a total of 16 clusters.
## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_fasted_biomarkers <- FindAllMarkers(object = seurat_campbell_fasted, only.pos = FALSE, min.pct = 0.2)
## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_fasted_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 160 x 7
## # Groups: cluster [16]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 5.87e-131 0.628 0.976 0.795 1.28e-126 0 ENSMUSG00000055430
## 2 5.43e- 86 0.787 0.7 0.37 1.18e- 81 0 ENSMUSG00000035864
## 3 3.11e- 55 0.645 0.423 0.179 6.77e- 51 0 ENSMUSG00000054459
## 4 7.29e- 54 0.613 0.602 0.353 1.59e- 49 0 ENSMUSG00000021700
## 5 1.01e- 53 0.605 0.611 0.364 2.20e- 49 0 ENSMUSG00000066392
## 6 4.19e- 51 0.698 0.384 0.16 9.12e- 47 0 ENSMUSG00000010803
## 7 4.83e- 51 0.836 0.555 0.341 1.05e- 46 0 ENSMUSG00000026787
## 8 6.40e- 50 0.676 0.313 0.113 1.39e- 45 0 ENSMUSG00000037771
## 9 2.58e- 49 0.630 0.511 0.276 5.62e- 45 0 ENSMUSG00000048978
## 10 4.72e- 41 0.770 0.239 0.082 1.03e- 36 0 ENSMUSG00000036357
## # ... with 150 more rows
# write.csv(as.data.frame(seurat_campbell_fasted_biomarkers), file = "seurat_campbell_fasted_biomarkers.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_fasted_biomarkers.csv", quote = FALSE)
## Perform ROC DE test. This can take a long time.
seurat_campbell_fasted_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_fasted, only.pos = FALSE, min.pct = 0.2, test.use = "roc")
top10_seurat_campbell_markers_ROC<- seurat_campbell_fasted_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 142 x 9
## # Groups: cluster [16]
## myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0.754 0.628 0.508 0.628 0.976 0.795 NA 0 ENSMUSG00…
## 2 0.728 0.515 0.456 0.515 0.99 0.825 NA 0 ENSMUSG00…
## 3 0.994 4.12 0.988 4.12 0.991 0.105 NA 1 ENSMUSG00…
## 4 0.988 4.86 0.976 4.86 0.98 0.143 NA 1 ENSMUSG00…
## 5 0.845 0.995 0.69 0.995 0.976 0.761 NA 1 ENSMUSG00…
## 6 0.818 1.63 0.636 1.63 0.725 0.137 NA 1 ENSMUSG00…
## 7 0.772 1.66 0.544 1.66 0.561 0.023 NA 1 ENSMUSG00…
## 8 0.736 1.30 0.472 1.30 0.563 0.12 NA 1 ENSMUSG00…
## 9 0.732 1.35 0.464 1.35 0.499 0.041 NA 1 ENSMUSG00…
## 10 0.725 1.40 0.450 1.40 0.49 0.049 NA 1 ENSMUSG00…
## # ... with 132 more rows
# write.csv(as.data.frame(seurat_campbell_fasted_biomarkers_ROC), file = "seurat_campbell_fasted_biomarkers_ROC.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_fasted_biomarkers_ROC.csv", quote = FALSE)
## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_fasted,
genes.use = top10_seurat_campbell_markers$gene,
slim.col.label = TRUE,
remove.key = TRUE)
## save seurat object as .rds
# saveRDS(seurat_campbell_fasted, file = "./seurat_campbell_fasted_final.rds")
## Load seurat object
seurat_campbell_lfd<- readRDS(file = "./seurat_campbell_lfd_just_created.rds")
seurat_campbell_lfd
## An object of class seurat in project CAMPBELL_LFD
## 23273 genes across 3347 samples.
## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)
## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url
## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_lfd@raw.data[mito_genes, ]
mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]
seurat_campbell_lfd@raw.data[mito_genes_present, 1:5]
## AAAAAAGATACT AAAACGAGTACC AAAACTAAAACA AAAAGGTGGGTC
## ENSMUSG00000000028 0 0 0 0
## ENSMUSG00000000037 0 0 0 0
## ENSMUSG00000000149 0 0 0 0
## ENSMUSG00000000148 0 1 0 0
## ENSMUSG00000000202 0 0 0 0
## ENSMUSG00000000154 0 0 0 0
## ENSMUSG00000000134 0 0 0 0
## ENSMUSG00000000126 0 0 0 0
## ENSMUSG00000000085 0 0 0 0
## ENSMUSG00000000194 0 0 0 0
## ENSMUSG00000000093 0 0 0 0
## ENSMUSG00000000167 0 0 0 0
## ENSMUSG00000000094 0 0 0 0
## ENSMUSG00000000131 0 0 0 0
## ENSMUSG00000000120 0 0 0 0
## ENSMUSG00000000197 0 0 0 0
## ENSMUSG00000000058 0 0 0 0
## ENSMUSG00000000159 4 15 0 0
## ENSMUSG00000000168 0 0 0 0
## ENSMUSG00000000056 1 0 0 0
## ENSMUSG00000000049 0 0 0 0
## ENSMUSG00000000184 1 1 0 1
## ENSMUSG00000000142 0 0 0 0
## ENSMUSG00000000078 0 1 0 0
## ENSMUSG00000000001 0 0 0 1
## ENSMUSG00000000223 0 0 0 0
## ENSMUSG00000000127 0 0 0 0
## ENSMUSG00000000171 4 1 0 0
## ENSMUSG00000000214 0 0 0 0
## ENSMUSG00000000088 1 0 0 1
## ENSMUSG00000000125 0 0 0 0
## ENSMUSG00000000031 0 0 0 0
## AAAATCAGCTTC
## ENSMUSG00000000028 0
## ENSMUSG00000000037 0
## ENSMUSG00000000149 0
## ENSMUSG00000000148 0
## ENSMUSG00000000202 0
## ENSMUSG00000000154 0
## ENSMUSG00000000134 0
## ENSMUSG00000000126 0
## ENSMUSG00000000085 0
## ENSMUSG00000000194 0
## ENSMUSG00000000093 0
## ENSMUSG00000000167 0
## ENSMUSG00000000094 0
## ENSMUSG00000000131 0
## ENSMUSG00000000120 0
## ENSMUSG00000000197 0
## ENSMUSG00000000058 0
## ENSMUSG00000000159 4
## ENSMUSG00000000168 0
## ENSMUSG00000000056 0
## ENSMUSG00000000049 0
## ENSMUSG00000000184 0
## ENSMUSG00000000142 0
## ENSMUSG00000000078 0
## ENSMUSG00000000001 0
## ENSMUSG00000000223 0
## ENSMUSG00000000127 0
## ENSMUSG00000000171 1
## ENSMUSG00000000214 0
## ENSMUSG00000000088 0
## ENSMUSG00000000125 0
## ENSMUSG00000000031 0
dim(seurat_campbell_lfd@raw.data[mito_genes_present, ])
## [1] 32 3347
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_lfd@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_lfd@raw.data)
## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0007315 0.0012928 0.0015295 0.0020677 0.0076620
## add percentage mitocondrial genes into metadata
seurat_campbell_lfd <- AddMetaData(object = seurat_campbell_lfd,
metadata = percent_mito,
col.name = "percent_mito")
## Look at the seurat object meta data
head(seurat_campbell_lfd@meta.data)
## nGene nUMI orig.ident replicate_name percent_mito
## AAAAAAGATACT 2806 5638 CAMPBELL_LFD SRR5164439 0.0019517388
## AAAACGAGTACC 3030 7629 CAMPBELL_LFD SRR5164439 0.0024924570
## AAAACTAAAACA 964 1996 CAMPBELL_LFD SRR5164439 0.0000000000
## AAAAGGTGGGTC 2747 6120 CAMPBELL_LFD SRR5164439 0.0004903563
## AAAATCAGCTTC 2886 5655 CAMPBELL_LFD SRR5164439 0.0008849558
## AAAATGAGACGG 1236 1768 CAMPBELL_LFD SRR5164439 0.0011312217
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_lfd,
features.plot = c("nGene", "nUMI", "percent_mito"),
nCol = 3,
x.lab.rot = TRUE,
point.size.use = 0.2
)
## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_lfd, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_lfd, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_lfd, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)
GenePlot(object = seurat_campbell_lfd, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)
Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 5000 genes expressed.
## manual check; I already know all cells have >800 genes
table(seurat_campbell_lfd@meta.data$percent_mito < 0.004 & seurat_campbell_lfd@meta.data$nGene<5000)
##
## FALSE TRUE
## 213 3134
# FALSE TRUE
# 213 3134
## Filter cells with <0.4% percent_mito and <5000 genes
seurat_campbell_lfd <- FilterCells(object = seurat_campbell_lfd,
subset.names = c("nGene", "percent_mito"),
low.thresholds = c(800, -Inf),
high.thresholds = c(5000, 0.004))
seurat_campbell_lfd
## An object of class seurat in project CAMPBELL_LFD
## 23273 genes across 3134 samples.
# An object of class seurat in project CAMPBELL_FASTED
# 21789 genes across 3640 samples.
## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_lfd@data),
breaks = 100,
main = "Total expression before normalisation",
xlab = "Sum of expression")
## Normalise gene expression per cell
seurat_campbell_lfd <- NormalizeData(object = seurat_campbell_lfd, normalization.method = "LogNormalize", scale.factor = 10000)
## Plot graph of total expression after normalisation
hist(colSums(as.data.frame(as.matrix(seurat_campbell_lfd@data))),
breaks = 100,
main = "Total expression after normalisation",
xlab = "Sum of expression")
Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.
## Find variable genes by expression
seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.05,
x.high.cutoff = 3,
y.cutoff = 0.75,
num.bin = 20,
binning.method = "equal_width"
)
# number of variable genes
length(seurat_campbell_lfd@var.genes)
## [1] 1480
seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1484 variable genes
seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 2420 variable genes
seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1986 variable genes
seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 3189 variable genes
seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 40, binning.method = “equal_width” )
== 3122 variable gene
seurat_campbell_lfd <- FindVariableGenes(object = seurat_campbell_lfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 3, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1480 variable gene
Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.
## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_lfd <- ScaleData(object = seurat_campbell_lfd, vars.to.regress = c("nUMI", "percent_mito"))
## Regressing out: nUMI, percent_mito
##
## Time Elapsed: 29.3174724578857 secs
## Scaling data matrix
Principal component anlysis of variable genes for use in cell clustering.
## Perform principal component analysis on variable genes
seurat_campbell_lfd <- RunPCA(object = seurat_campbell_lfd,
pc.genes = seurat_campbell_lfd@var.genes,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000050711" "ENSMUSG00000022577" "ENSMUSG00000043388"
## [4] "ENSMUSG00000036699" "ENSMUSG00000044349"
## [1] ""
## [1] "ENSMUSG00000031765" "ENSMUSG00000031762" "ENSMUSG00000026701"
## [4] "ENSMUSG00000001025" "ENSMUSG00000000567"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000037852" "ENSMUSG00000055254" "ENSMUSG00000026701"
## [4] "ENSMUSG00000017390" "ENSMUSG00000035805"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000079018" "ENSMUSG00000075602"
## [4] "ENSMUSG00000041378" "ENSMUSG00000040584"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000058715" "ENSMUSG00000036896" "ENSMUSG00000036887"
## [4] "ENSMUSG00000036905" "ENSMUSG00000030579"
## [1] ""
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000076439"
## [4] "ENSMUSG00000026830" "ENSMUSG00000032854"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000079018" "ENSMUSG00000040584" "ENSMUSG00000041378"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602"
## [1] ""
## [1] "ENSMUSG00000036896" "ENSMUSG00000036905" "ENSMUSG00000036887"
## [4] "ENSMUSG00000058715" "ENSMUSG00000024621"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000026830" "ENSMUSG00000050121" "ENSMUSG00000015090"
## [4] "ENSMUSG00000076439" "ENSMUSG00000027375"
## [1] ""
## [1] "ENSMUSG00000031610" "ENSMUSG00000026249" "ENSMUSG00000026424"
## [4] "ENSMUSG00000029231" "ENSMUSG00000019874"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_lfd, pcs.use = 1:9)
## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_lfd, dim.1 = 1, dim.2 = 2)
## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_lfd, dim.1 = 2, dim.2 = 3)
## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_lfd, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_lfd, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_lfd, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_lfd, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_lfd,
pc.use = 5:18,
cells.use = 500,
do.balanced = TRUE,
label.columns = FALSE)
## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_lfd <- ProjectPCA(object = seurat_campbell_lfd, do.print = TRUE)
## [1] "PC1"
## [1] "ENSMUSG00000033061" "ENSMUSG00000047261" "ENSMUSG00000027581"
## [4] "ENSMUSG00000050711" "ENSMUSG00000025468" "ENSMUSG00000024268"
## [7] "ENSMUSG00000022577" "ENSMUSG00000043388" "ENSMUSG00000055430"
## [10] "ENSMUSG00000036699" "ENSMUSG00000021087" "ENSMUSG00000029223"
## [13] "ENSMUSG00000019986" "ENSMUSG00000044349" "ENSMUSG00000026576"
## [16] "ENSMUSG00000027350" "ENSMUSG00000027500" "ENSMUSG00000019923"
## [19] "ENSMUSG00000035964" "ENSMUSG00000059361" "ENSMUSG00000042750"
## [22] "ENSMUSG00000031840" "ENSMUSG00000018965" "ENSMUSG00000060188"
## [25] "ENSMUSG00000071658" "ENSMUSG00000027273" "ENSMUSG00000066705"
## [28] "ENSMUSG00000040785" "ENSMUSG00000022658" "ENSMUSG00000020297"
## [1] ""
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000031762" "ENSMUSG00000026701" "ENSMUSG00000001025"
## [7] "ENSMUSG00000000567" "ENSMUSG00000026728" "ENSMUSG00000067786"
## [10] "ENSMUSG00000035805" "ENSMUSG00000058135" "ENSMUSG00000022528"
## [13] "ENSMUSG00000029838" "ENSMUSG00000050953" "ENSMUSG00000018593"
## [16] "ENSMUSG00000005360" "ENSMUSG00000095538" "ENSMUSG00000055254"
## [19] "ENSMUSG00000027712" "ENSMUSG00000044080" "ENSMUSG00000018102"
## [22] "ENSMUSG00000008540" "ENSMUSG00000027447" "ENSMUSG00000032231"
## [25] "ENSMUSG00000036570" "ENSMUSG00000034467" "ENSMUSG00000063564"
## [28] "ENSMUSG00000031342" "ENSMUSG00000008575" "ENSMUSG00000017390"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000018451" "ENSMUSG00000037852" "ENSMUSG00000067786"
## [4] "ENSMUSG00000026223" "ENSMUSG00000055254" "ENSMUSG00000101111"
## [7] "ENSMUSG00000052727" "ENSMUSG00000039278" "ENSMUSG00000031760"
## [10] "ENSMUSG00000026701" "ENSMUSG00000058254" "ENSMUSG00000031428"
## [13] "ENSMUSG00000064370" "ENSMUSG00000017390" "ENSMUSG00000021270"
## [16] "ENSMUSG00000046432" "ENSMUSG00000116358" "ENSMUSG00000021268"
## [19] "ENSMUSG00000035805" "ENSMUSG00000021379" "ENSMUSG00000032324"
## [22] "ENSMUSG00000025666" "ENSMUSG00000001025" "ENSMUSG00000006373"
## [25] "ENSMUSG00000050071" "ENSMUSG00000079037" "ENSMUSG00000000567"
## [28] "ENSMUSG00000019505" "ENSMUSG00000044550" "ENSMUSG00000030647"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000079018" "ENSMUSG00000075602"
## [4] "ENSMUSG00000041378" "ENSMUSG00000040584" "ENSMUSG00000036896"
## [7] "ENSMUSG00000022584" "ENSMUSG00000016494" "ENSMUSG00000058715"
## [10] "ENSMUSG00000036887" "ENSMUSG00000036905" "ENSMUSG00000056492"
## [13] "ENSMUSG00000030235" "ENSMUSG00000028581" "ENSMUSG00000029484"
## [16] "ENSMUSG00000030237" "ENSMUSG00000024621" "ENSMUSG00000030579"
## [19] "ENSMUSG00000021423" "ENSMUSG00000020154" "ENSMUSG00000023992"
## [22] "ENSMUSG00000024397" "ENSMUSG00000033960" "ENSMUSG00000029622"
## [25] "ENSMUSG00000036256" "ENSMUSG00000038642" "ENSMUSG00000001123"
## [28] "ENSMUSG00000015852" "ENSMUSG00000000530" "ENSMUSG00000060802"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000058715" "ENSMUSG00000036896" "ENSMUSG00000036887"
## [4] "ENSMUSG00000036905" "ENSMUSG00000030579" "ENSMUSG00000028581"
## [7] "ENSMUSG00000023992" "ENSMUSG00000024621" "ENSMUSG00000021423"
## [10] "ENSMUSG00000024397" "ENSMUSG00000015852" "ENSMUSG00000038642"
## [13] "ENSMUSG00000046805" "ENSMUSG00000048163" "ENSMUSG00000027447"
## [16] "ENSMUSG00000036353" "ENSMUSG00000000682" "ENSMUSG00000059498"
## [19] "ENSMUSG00000040229" "ENSMUSG00000030786" "ENSMUSG00000052160"
## [22] "ENSMUSG00000040747" "ENSMUSG00000054675" "ENSMUSG00000030844"
## [25] "ENSMUSG00000018008" "ENSMUSG00000020377" "ENSMUSG00000026126"
## [28] "ENSMUSG00000044811" "ENSMUSG00000069516" "ENSMUSG00000018451"
## [1] ""
## [1] "ENSMUSG00000037625" "ENSMUSG00000031425" "ENSMUSG00000076439"
## [4] "ENSMUSG00000026830" "ENSMUSG00000032854" "ENSMUSG00000027375"
## [7] "ENSMUSG00000036634" "ENSMUSG00000050121" "ENSMUSG00000032517"
## [10] "ENSMUSG00000031775" "ENSMUSG00000033579" "ENSMUSG00000032060"
## [13] "ENSMUSG00000027562" "ENSMUSG00000046160" "ENSMUSG00000073680"
## [16] "ENSMUSG00000022425" "ENSMUSG00000041607" "ENSMUSG00000037166"
## [19] "ENSMUSG00000006782" "ENSMUSG00000015090" "ENSMUSG00000027858"
## [22] "ENSMUSG00000020486" "ENSMUSG00000040759" "ENSMUSG00000013523"
## [25] "ENSMUSG00000027199" "ENSMUSG00000026888" "ENSMUSG00000022090"
## [28] "ENSMUSG00000043448" "ENSMUSG00000020774" "ENSMUSG00000090996"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000079018" "ENSMUSG00000040584" "ENSMUSG00000041378"
## [4] "ENSMUSG00000056492" "ENSMUSG00000075602" "ENSMUSG00000030235"
## [7] "ENSMUSG00000022584" "ENSMUSG00000030237" "ENSMUSG00000020154"
## [10] "ENSMUSG00000033960" "ENSMUSG00000001946" "ENSMUSG00000029648"
## [13] "ENSMUSG00000036256" "ENSMUSG00000031239" "ENSMUSG00000026193"
## [16] "ENSMUSG00000039167" "ENSMUSG00000025902" "ENSMUSG00000029086"
## [19] "ENSMUSG00000042116" "ENSMUSG00000028776" "ENSMUSG00000034738"
## [22] "ENSMUSG00000061353" "ENSMUSG00000039349" "ENSMUSG00000030413"
## [25] "ENSMUSG00000000530" "ENSMUSG00000006386" "ENSMUSG00000020717"
## [28] "ENSMUSG00000027435" "ENSMUSG00000009687" "ENSMUSG00000045954"
## [1] ""
## [1] "ENSMUSG00000036896" "ENSMUSG00000036905" "ENSMUSG00000036887"
## [4] "ENSMUSG00000058715" "ENSMUSG00000024621" "ENSMUSG00000028581"
## [7] "ENSMUSG00000021423" "ENSMUSG00000023992" "ENSMUSG00000030579"
## [10] "ENSMUSG00000024397" "ENSMUSG00000038642" "ENSMUSG00000015852"
## [13] "ENSMUSG00000048163" "ENSMUSG00000046805" "ENSMUSG00000036353"
## [16] "ENSMUSG00000090639" "ENSMUSG00000052160" "ENSMUSG00000059498"
## [19] "ENSMUSG00000030786" "ENSMUSG00000018008" "ENSMUSG00000000682"
## [22] "ENSMUSG00000040747" "ENSMUSG00000040229" "ENSMUSG00000054675"
## [25] "ENSMUSG00000036908" "ENSMUSG00000020377" "ENSMUSG00000098112"
## [28] "ENSMUSG00000026126" "ENSMUSG00000044811" "ENSMUSG00000070354"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000024661" "ENSMUSG00000026830" "ENSMUSG00000050121"
## [4] "ENSMUSG00000015090" "ENSMUSG00000076439" "ENSMUSG00000027375"
## [7] "ENSMUSG00000022548" "ENSMUSG00000027562" "ENSMUSG00000037625"
## [10] "ENSMUSG00000032517" "ENSMUSG00000036634" "ENSMUSG00000037166"
## [13] "ENSMUSG00000020774" "ENSMUSG00000022090" "ENSMUSG00000020486"
## [16] "ENSMUSG00000026421" "ENSMUSG00000043448" "ENSMUSG00000090639"
## [19] "ENSMUSG00000073680" "ENSMUSG00000064351" "ENSMUSG00000026255"
## [22] "ENSMUSG00000038155" "ENSMUSG00000052727" "ENSMUSG00000101111"
## [25] "ENSMUSG00000041607" "ENSMUSG00000100862" "ENSMUSG00000024810"
## [28] "ENSMUSG00000090996" "ENSMUSG00000083563" "ENSMUSG00000039904"
## [1] ""
## [1] "ENSMUSG00000031610" "ENSMUSG00000026249" "ENSMUSG00000026424"
## [4] "ENSMUSG00000029231" "ENSMUSG00000019874" "ENSMUSG00000106379"
## [7] "ENSMUSG00000067879" "ENSMUSG00000032911" "ENSMUSG00000045532"
## [10] "ENSMUSG00000052229" "ENSMUSG00000079056" "ENSMUSG00000039830"
## [13] "ENSMUSG00000016995" "ENSMUSG00000032482" "ENSMUSG00000033208"
## [16] "ENSMUSG00000069763" "ENSMUSG00000068748" "ENSMUSG00000046160"
## [19] "ENSMUSG00000063297" "ENSMUSG00000017386" "ENSMUSG00000034164"
## [22] "ENSMUSG00000034353" "ENSMUSG00000034000" "ENSMUSG00000028655"
## [25] "ENSMUSG00000022122" "ENSMUSG00000069662" "ENSMUSG00000026955"
## [28] "ENSMUSG00000030317" "ENSMUSG00000006800" "ENSMUSG00000086596"
## [1] ""
## [1] ""
## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_lfd <- JackStraw(object = seurat_campbell_lfd,
num.replicate = 100,
display.progress = TRUE
)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
## Time Elapsed: 1.89125941197077 mins
# Maximum number of PCs allowed = 20.
## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_lfd, PCs = 1:20)
## Warning: Removed 20911 rows containing missing values (geom_point).
## An object of class seurat in project CAMPBELL_LFD
## 23273 genes across 3134 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot
PCElbowPlot(object = seurat_campbell_lfd)
## Cluster cells by PC
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_lfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:14:19
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_lfd)
TSNEPlot(object = seurat_campbell_lfd,
no.legend = TRUE,
do.label = TRUE)
## 1986 variable genes, 20 PC = 14 clusters
## 1986 variable genes, 16 PC = 14 clusters
table(seurat_campbell_lfd@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 639 617 496 347 206 200 122 120 107 89 77 50 35 29
#######
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16,
resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_lfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:14:19
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_lfd)
TSNEPlot(object = seurat_campbell_lfd,
no.legend = TRUE,
do.label = TRUE)
## 1986 variable genes, 20 PC = 18 clusters
## 1986 variable genes, 16 PC = 16 clusters
table(seurat_campbell_lfd@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 584 508 491 312 225 206 141 122 113 107 89 86 50 36 35 29
#######
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16,
resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_lfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:14:19
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_lfd)
TSNEPlot(object = seurat_campbell_lfd,
no.legend = TRUE,
do.label = TRUE)
## 1986 variable genes, 20 PC = 18 clusters
## 1986 variable genes, 16 PC = 18 clusters
table(seurat_campbell_lfd@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 490 429 315 284 240 225 173 170 138 123 113 109 89 86 50 36 35 29
#######
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16,
resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_lfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:14:19
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_lfd)
TSNEPlot(object = seurat_campbell_lfd,
no.legend = TRUE,
do.label = TRUE)
## 1986 variable genes, 20 PC = 20 clusters
## 1986 variable genes, 16 PC = 20 clusters
table(seurat_campbell_lfd@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 380 287 255 253 242 224 205 173 168 140 139 123 113 107 89 86 50 36
## 18 19
## 35 29
## Cluster cells using final parameters (1986 genes, 16 PC, resolution = 1.0)
seurat_campbell_lfd <- FindClusters(object = seurat_campbell_lfd, reduction.type = "pca", dims.use = 1:16, resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
## Produce t-SNE of final parameters:
seurat_campbell_lfd <- RunTSNE(object = seurat_campbell_lfd, dims.use = 1:16, do.fast = TRUE)
Use 1986 variable genes and 16 principal components with a resolution of 1.0 (Changing number of PC makes very little difference to the clustering). This gives a total of 16 clusters.
## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_lfd_biomarkers <- FindAllMarkers(object = seurat_campbell_lfd, only.pos = FALSE, min.pct = 0.2)
## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_lfd_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 160 x 7
## # Groups: cluster [16]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 2.19e-149 0.871 0.793 0.218 5.09e-145 0 ENSMUSG00000054459
## 2 1.06e-145 0.895 0.981 0.461 2.46e-141 0 ENSMUSG00000047261
## 3 6.87e-140 0.886 0.916 0.346 1.60e-135 0 ENSMUSG00000027273
## 4 2.39e-131 0.830 0.94 0.409 5.56e-127 0 ENSMUSG00000027500
## 5 2.15e-113 0.890 0.899 0.403 4.99e-109 0 ENSMUSG00000000159
## 6 2.58e-113 0.871 0.786 0.281 6.01e-109 0 ENSMUSG00000035864
## 7 6.48e-107 0.811 0.981 0.727 1.51e-102 0 ENSMUSG00000021268
## 8 1.03e-101 0.871 0.536 0.135 2.41e- 97 0 ENSMUSG00000028222
## 9 8.29e- 23 1.28 0.337 0.159 1.93e- 18 0 ENSMUSG00000020660
## 10 3.17e- 17 1.20 0.267 0.129 7.38e- 13 0 ENSMUSG00000004366
## # ... with 150 more rows
# write.csv(as.data.frame(seurat_campbell_lfd_biomarkers), file = "seurat_campbell_lfd_biomarkers.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_lfd_biomarkers.csv", quote = FALSE)
## Perform ROC DE test. This can take a long time.
seurat_campbell_lfd_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_lfd, only.pos = FALSE, min.pct = 0.2, test.use = "roc")
top10_seurat_campbell_markers_ROC<- seurat_campbell_lfd_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 160 x 9
## # Groups: cluster [16]
## myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0.825 0.895 0.650 0.895 0.981 0.461 NA 0 ENSMUSG00…
## 2 0.805 0.886 0.61 0.886 0.916 0.346 NA 0 ENSMUSG00…
## 3 0.803 0.830 0.606 0.830 0.94 0.409 NA 0 ENSMUSG00…
## 4 0.789 0.811 0.578 0.811 0.981 0.727 NA 0 ENSMUSG00…
## 5 0.787 0.871 0.574 0.871 0.793 0.218 NA 0 ENSMUSG00…
## 6 0.78 0.890 0.56 0.890 0.899 0.403 NA 0 ENSMUSG00…
## 7 0.77 0.775 0.54 0.775 0.988 0.823 NA 0 ENSMUSG00…
## 8 0.761 0.871 0.522 0.871 0.786 0.281 NA 0 ENSMUSG00…
## 9 0.708 0.778 0.416 0.778 0.63 0.21 NA 0 ENSMUSG00…
## 10 0.702 0.871 0.404 0.871 0.536 0.135 NA 0 ENSMUSG00…
## # ... with 150 more rows
# write.csv(as.data.frame(seurat_campbell_lfd_biomarkers_ROC), file = "seurat_campbell_lfd_biomarkers_ROC.csv", quote = FALSE)
# write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_lfd_biomarkers_ROC.csv", quote = FALSE)
## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_lfd,
genes.use = top10_seurat_campbell_markers$gene,
slim.col.label = TRUE,
remove.key = TRUE)
## save seurat object as .rds
# saveRDS(seurat_campbell_lfd, file = "./seurat_campbell_lfd_final.rds")
## Load seurat object
seurat_campbell_hfd<- readRDS(file = "./seurat_campbell_hfd_just_created.rds")
seurat_campbell_hfd
## An object of class seurat in project CAMPBELL_HFD
## 23685 genes across 3778 samples.
## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)
## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url
## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_hfd@raw.data[mito_genes, ]
mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]
seurat_campbell_hfd@raw.data[mito_genes_present, 1:5]
## AAAAAATGGAGC AAAACGCGTGTC AAAACGGACAGA AAAACGTAACAG
## ENSMUSG00000000028 0 0 0 0
## ENSMUSG00000000037 0 0 0 0
## ENSMUSG00000000154 0 0 0 0
## ENSMUSG00000000149 1 0 0 0
## ENSMUSG00000000202 0 0 0 1
## ENSMUSG00000000159 0 1 5 0
## ENSMUSG00000000142 0 0 0 0
## ENSMUSG00000000127 0 1 0 0
## ENSMUSG00000000085 0 0 1 0
## ENSMUSG00000000194 0 0 0 0
## ENSMUSG00000000093 0 0 0 0
## ENSMUSG00000000168 0 0 0 0
## ENSMUSG00000000120 0 0 0 0
## ENSMUSG00000000134 0 0 0 0
## ENSMUSG00000000125 0 0 0 0
## ENSMUSG00000000197 0 0 0 0
## ENSMUSG00000000058 0 0 0 0
## ENSMUSG00000000167 0 0 0 0
## ENSMUSG00000000171 0 0 1 0
## ENSMUSG00000000056 0 0 0 0
## ENSMUSG00000000049 0 0 0 0
## ENSMUSG00000000184 0 0 1 3
## ENSMUSG00000000148 0 0 0 0
## ENSMUSG00000000078 0 0 0 1
## ENSMUSG00000000001 0 0 0 0
## ENSMUSG00000000223 0 0 0 0
## ENSMUSG00000000131 0 0 0 0
## ENSMUSG00000000183 0 0 0 0
## ENSMUSG00000000214 0 0 0 0
## ENSMUSG00000000088 0 0 1 0
## ENSMUSG00000000126 0 0 0 0
## ENSMUSG00000000031 0 0 0 0
## AAAACTAGTGGT
## ENSMUSG00000000028 0
## ENSMUSG00000000037 0
## ENSMUSG00000000154 0
## ENSMUSG00000000149 0
## ENSMUSG00000000202 0
## ENSMUSG00000000159 0
## ENSMUSG00000000142 0
## ENSMUSG00000000127 0
## ENSMUSG00000000085 0
## ENSMUSG00000000194 1
## ENSMUSG00000000093 0
## ENSMUSG00000000168 0
## ENSMUSG00000000120 0
## ENSMUSG00000000134 0
## ENSMUSG00000000125 0
## ENSMUSG00000000197 0
## ENSMUSG00000000058 0
## ENSMUSG00000000167 0
## ENSMUSG00000000171 0
## ENSMUSG00000000056 0
## ENSMUSG00000000049 0
## ENSMUSG00000000184 0
## ENSMUSG00000000148 0
## ENSMUSG00000000078 0
## ENSMUSG00000000001 1
## ENSMUSG00000000223 0
## ENSMUSG00000000131 0
## ENSMUSG00000000183 0
## ENSMUSG00000000214 0
## ENSMUSG00000000088 0
## ENSMUSG00000000126 0
## ENSMUSG00000000031 0
dim(seurat_campbell_hfd@raw.data[mito_genes_present, ])
## [1] 32 3778
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_hfd@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_hfd@raw.data)
## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0008024 0.0014544 0.0017117 0.0023004 0.0085837
## add percentage mitocondrial genes into metadata
seurat_campbell_hfd <- AddMetaData(object = seurat_campbell_hfd,
metadata = percent_mito,
col.name = "percent_mito")
## Look at the seurat object meta data
head(seurat_campbell_hfd@meta.data)
## nGene nUMI orig.ident replicate_name percent_mito
## AAAAAATGGAGC 921 1362 CAMPBELL_HFD SRR5164440 0.0007342144
## AAAACGCGTGTC 1348 2221 CAMPBELL_HFD SRR5164440 0.0009009009
## AAAACGGACAGA 2127 3652 CAMPBELL_HFD SRR5164440 0.0024671053
## AAAACGTAACAG 1988 3575 CAMPBELL_HFD SRR5164440 0.0013993843
## AAAACTAGTGGT 1723 3175 CAMPBELL_HFD SRR5164440 0.0006301197
## AAAACTATCCCC 3099 6753 CAMPBELL_HFD SRR5164440 0.0029638411
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_hfd,
features.plot = c("nGene", "nUMI", "percent_mito"),
nCol = 3,
x.lab.rot = TRUE,
point.size.use = 0.2
)
## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_hfd, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_hfd, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_hfd, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)
GenePlot(object = seurat_campbell_hfd, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)
Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 5000 genes expressed.
## manual check; I already know all cells have >800 genes
table(seurat_campbell_hfd@meta.data$percent_mito < 0.004 & seurat_campbell_hfd@meta.data$nGene<5000)
##
## FALSE TRUE
## 296 3482
# FALSE TRUE
# 296 3482
## Filter cells with <0.4% percent_mito and <5000 genes
seurat_campbell_hfd <- FilterCells(object = seurat_campbell_hfd,
subset.names = c("nGene", "percent_mito"),
low.thresholds = c(800, -Inf),
high.thresholds = c(5000, 0.004))
seurat_campbell_hfd
## An object of class seurat in project CAMPBELL_HFD
## 23685 genes across 3482 samples.
# An object of class seurat in project CAMPBELL_HFD
# 23685 genes across 3482 samples.
## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_hfd@data),
breaks = 100,
main = "Total expression before normalisation",
xlab = "Sum of expression")
## Normalise gene expression per cell
seurat_campbell_hfd <- NormalizeData(object = seurat_campbell_hfd, normalization.method = "LogNormalize", scale.factor = 10000)
## Plot graph of total expression after normalisation
hist(colSums(as.data.frame(as.matrix(seurat_campbell_hfd@data))),
breaks = 100,
main = "Total expression after normalisation",
xlab = "Sum of expression")
Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.
## Find variable genes by expression
seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.05,
x.high.cutoff = 3,
y.cutoff = 0.75,
num.bin = 20,
binning.method = "equal_width"
)
# number of variable genes
length(seurat_campbell_hfd@var.genes)
## [1] 1454
seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1459 variable genes
seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 2305 variable genes
seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1941 variable genes
seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 3085 variable genes
seurat_campbell_hfd <- FindVariableGenes(object = seurat_campbell_hfd, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 40, binning.method = “equal_width” )
== 3034 variable gene
Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.
## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_hfd <- ScaleData(object = seurat_campbell_hfd, vars.to.regress = c("nUMI", "percent_mito"))
## Regressing out: nUMI, percent_mito
##
## Time Elapsed: 31.4707977771759 secs
## Scaling data matrix
Principal component anlysis of variable genes for use in cell clustering.
## Perform principal component analysis on variable genes
seurat_campbell_hfd <- RunPCA(object = seurat_campbell_hfd,
pc.genes = seurat_campbell_hfd@var.genes,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000026385" "ENSMUSG00000031765" "ENSMUSG00000031762"
## [4] "ENSMUSG00000001025" "ENSMUSG00000026701"
## [1] ""
## [1] "ENSMUSG00000050711" "ENSMUSG00000043388" "ENSMUSG00000027350"
## [4] "ENSMUSG00000024261" "ENSMUSG00000035864"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000050711" "ENSMUSG00000055254" "ENSMUSG00000037852"
## [4] "ENSMUSG00000043388" "ENSMUSG00000031760"
## [1] ""
## [1] "ENSMUSG00000027199" "ENSMUSG00000027375" "ENSMUSG00000037625"
## [4] "ENSMUSG00000032854" "ENSMUSG00000076439"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000032060" "ENSMUSG00000076439"
## [4] "ENSMUSG00000026830" "ENSMUSG00000032854"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000029484" "ENSMUSG00000079018"
## [4] "ENSMUSG00000041378" "ENSMUSG00000075602"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000036905" "ENSMUSG00000036887" "ENSMUSG00000036896"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715"
## [1] ""
## [1] "ENSMUSG00000041378" "ENSMUSG00000079018" "ENSMUSG00000030237"
## [4] "ENSMUSG00000030235" "ENSMUSG00000020154"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000094800"
## [4] "ENSMUSG00000072674" "ENSMUSG00000027744"
## [1] ""
## [1] "ENSMUSG00000022132" "ENSMUSG00000029838" "ENSMUSG00000022528"
## [4] "ENSMUSG00000024518" "ENSMUSG00000058897"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_hfd, pcs.use = 1:9)
## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_hfd, dim.1 = 1, dim.2 = 2)
## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_hfd, dim.1 = 2, dim.2 = 3)
## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_hfd, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_hfd, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_hfd, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_hfd, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_hfd,
pc.use = 5:18,
cells.use = 500,
do.balanced = TRUE,
label.columns = FALSE)
## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_hfd <- ProjectPCA(object = seurat_campbell_hfd, do.print = TRUE)
## [1] "PC1"
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000031762" "ENSMUSG00000001025" "ENSMUSG00000026701"
## [7] "ENSMUSG00000035805" "ENSMUSG00000050953" "ENSMUSG00000026728"
## [10] "ENSMUSG00000000567" "ENSMUSG00000067786" "ENSMUSG00000083138"
## [13] "ENSMUSG00000018593" "ENSMUSG00000058135" "ENSMUSG00000095538"
## [16] "ENSMUSG00000027712" "ENSMUSG00000005360" "ENSMUSG00000034467"
## [19] "ENSMUSG00000022528" "ENSMUSG00000026649" "ENSMUSG00000018102"
## [22] "ENSMUSG00000053931" "ENSMUSG00000017009" "ENSMUSG00000029838"
## [25] "ENSMUSG00000044080" "ENSMUSG00000036570" "ENSMUSG00000032231"
## [28] "ENSMUSG00000030342" "ENSMUSG00000031342" "ENSMUSG00000055254"
## [1] ""
## [1] "ENSMUSG00000033061" "ENSMUSG00000027581" "ENSMUSG00000050711"
## [4] "ENSMUSG00000047261" "ENSMUSG00000025468" "ENSMUSG00000022577"
## [7] "ENSMUSG00000026576" "ENSMUSG00000024268" "ENSMUSG00000019986"
## [10] "ENSMUSG00000029223" "ENSMUSG00000021087" "ENSMUSG00000043388"
## [13] "ENSMUSG00000042750" "ENSMUSG00000040785" "ENSMUSG00000055430"
## [16] "ENSMUSG00000044349" "ENSMUSG00000027500" "ENSMUSG00000036699"
## [19] "ENSMUSG00000019923" "ENSMUSG00000027350" "ENSMUSG00000035964"
## [22] "ENSMUSG00000027273" "ENSMUSG00000031840" "ENSMUSG00000048978"
## [25] "ENSMUSG00000060188" "ENSMUSG00000000159" "ENSMUSG00000033981"
## [28] "ENSMUSG00000020297" "ENSMUSG00000022658" "ENSMUSG00000059361"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000018451" "ENSMUSG00000021268" "ENSMUSG00000026223"
## [4] "ENSMUSG00000031428" "ENSMUSG00000033585" "ENSMUSG00000039278"
## [7] "ENSMUSG00000019986" "ENSMUSG00000026576" "ENSMUSG00000040785"
## [10] "ENSMUSG00000050071" "ENSMUSG00000006373" "ENSMUSG00000042750"
## [13] "ENSMUSG00000046432" "ENSMUSG00000055430" "ENSMUSG00000025579"
## [16] "ENSMUSG00000002265" "ENSMUSG00000022577" "ENSMUSG00000029223"
## [19] "ENSMUSG00000033061" "ENSMUSG00000043384" "ENSMUSG00000072964"
## [22] "ENSMUSG00000034723" "ENSMUSG00000050711" "ENSMUSG00000055254"
## [25] "ENSMUSG00000049422" "ENSMUSG00000024268" "ENSMUSG00000025468"
## [28] "ENSMUSG00000019923" "ENSMUSG00000058254" "ENSMUSG00000037852"
## [1] ""
## [1] "ENSMUSG00000027199" "ENSMUSG00000027375" "ENSMUSG00000037625"
## [4] "ENSMUSG00000032854" "ENSMUSG00000031425" "ENSMUSG00000076439"
## [7] "ENSMUSG00000031775" "ENSMUSG00000036634" "ENSMUSG00000026830"
## [10] "ENSMUSG00000022548" "ENSMUSG00000032517" "ENSMUSG00000027562"
## [13] "ENSMUSG00000050121" "ENSMUSG00000090639" "ENSMUSG00000022425"
## [16] "ENSMUSG00000020486" "ENSMUSG00000032060" "ENSMUSG00000033579"
## [19] "ENSMUSG00000041607" "ENSMUSG00000073680" "ENSMUSG00000006782"
## [22] "ENSMUSG00000046160" "ENSMUSG00000015090" "ENSMUSG00000070354"
## [25] "ENSMUSG00000037166" "ENSMUSG00000026888" "ENSMUSG00000020774"
## [28] "ENSMUSG00000022090" "ENSMUSG00000013523" "ENSMUSG00000027858"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000037625" "ENSMUSG00000032060" "ENSMUSG00000076439"
## [4] "ENSMUSG00000026830" "ENSMUSG00000031425" "ENSMUSG00000032854"
## [7] "ENSMUSG00000050121" "ENSMUSG00000006782" "ENSMUSG00000040759"
## [10] "ENSMUSG00000036634" "ENSMUSG00000032517" "ENSMUSG00000046160"
## [13] "ENSMUSG00000033579" "ENSMUSG00000015090" "ENSMUSG00000025203"
## [16] "ENSMUSG00000116358" "ENSMUSG00000073680" "ENSMUSG00000027858"
## [19] "ENSMUSG00000031342" "ENSMUSG00000068923" "ENSMUSG00000020774"
## [22] "ENSMUSG00000013523" "ENSMUSG00000022425" "ENSMUSG00000041607"
## [25] "ENSMUSG00000037166" "ENSMUSG00000030701" "ENSMUSG00000043448"
## [28] "ENSMUSG00000027562" "ENSMUSG00000050854" "ENSMUSG00000090996"
## [1] ""
## [1] "ENSMUSG00000020077" "ENSMUSG00000029484" "ENSMUSG00000079018"
## [4] "ENSMUSG00000041378" "ENSMUSG00000075602" "ENSMUSG00000030237"
## [7] "ENSMUSG00000030235" "ENSMUSG00000040584" "ENSMUSG00000020154"
## [10] "ENSMUSG00000056492" "ENSMUSG00000001123" "ENSMUSG00000058715"
## [13] "ENSMUSG00000036887" "ENSMUSG00000036905" "ENSMUSG00000032359"
## [16] "ENSMUSG00000016494" "ENSMUSG00000030579" "ENSMUSG00000036896"
## [19] "ENSMUSG00000028581" "ENSMUSG00000114487" "ENSMUSG00000024621"
## [22] "ENSMUSG00000039167" "ENSMUSG00000001946" "ENSMUSG00000036256"
## [25] "ENSMUSG00000022584" "ENSMUSG00000023992" "ENSMUSG00000021423"
## [28] "ENSMUSG00000020717" "ENSMUSG00000024397" "ENSMUSG00000033960"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000036905" "ENSMUSG00000036887" "ENSMUSG00000036896"
## [4] "ENSMUSG00000030579" "ENSMUSG00000058715" "ENSMUSG00000028581"
## [7] "ENSMUSG00000024621" "ENSMUSG00000023992" "ENSMUSG00000021423"
## [10] "ENSMUSG00000024397" "ENSMUSG00000038642" "ENSMUSG00000015852"
## [13] "ENSMUSG00000036353" "ENSMUSG00000048163" "ENSMUSG00000046805"
## [16] "ENSMUSG00000059498" "ENSMUSG00000000682" "ENSMUSG00000052160"
## [19] "ENSMUSG00000054675" "ENSMUSG00000002111" "ENSMUSG00000030786"
## [22] "ENSMUSG00000040747" "ENSMUSG00000040229" "ENSMUSG00000036908"
## [25] "ENSMUSG00000044811" "ENSMUSG00000027848" "ENSMUSG00000069516"
## [28] "ENSMUSG00000018008" "ENSMUSG00000021665" "ENSMUSG00000026786"
## [1] ""
## [1] "ENSMUSG00000041378" "ENSMUSG00000079018" "ENSMUSG00000030237"
## [4] "ENSMUSG00000030235" "ENSMUSG00000020154" "ENSMUSG00000040584"
## [7] "ENSMUSG00000056492" "ENSMUSG00000075602" "ENSMUSG00000039167"
## [10] "ENSMUSG00000001946" "ENSMUSG00000033960" "ENSMUSG00000022584"
## [13] "ENSMUSG00000036256" "ENSMUSG00000029648" "ENSMUSG00000114487"
## [16] "ENSMUSG00000020717" "ENSMUSG00000000805" "ENSMUSG00000039349"
## [19] "ENSMUSG00000054690" "ENSMUSG00000042116" "ENSMUSG00000031239"
## [22] "ENSMUSG00000027435" "ENSMUSG00000006386" "ENSMUSG00000026814"
## [25] "ENSMUSG00000061353" "ENSMUSG00000062960" "ENSMUSG00000024168"
## [28] "ENSMUSG00000024140" "ENSMUSG00000026193" "ENSMUSG00000051669"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000070306" "ENSMUSG00000043164" "ENSMUSG00000094800"
## [4] "ENSMUSG00000072674" "ENSMUSG00000027744" "ENSMUSG00000095304"
## [7] "ENSMUSG00000045655" "ENSMUSG00000108841" "ENSMUSG00000047671"
## [10] "ENSMUSG00000044772" "ENSMUSG00000020473" "ENSMUSG00000032595"
## [13] "ENSMUSG00000110332" "ENSMUSG00000027800" "ENSMUSG00000027867"
## [16] "ENSMUSG00000029182" "ENSMUSG00000038370" "ENSMUSG00000072473"
## [19] "ENSMUSG00000085416" "ENSMUSG00000051606" "ENSMUSG00000047139"
## [22] "ENSMUSG00000047394" "ENSMUSG00000037716" "ENSMUSG00000027360"
## [25] "ENSMUSG00000056174" "ENSMUSG00000046242" "ENSMUSG00000044475"
## [28] "ENSMUSG00000041323" "ENSMUSG00000060981" "ENSMUSG00000086742"
## [1] ""
## [1] "ENSMUSG00000022132" "ENSMUSG00000029838" "ENSMUSG00000022528"
## [4] "ENSMUSG00000024518" "ENSMUSG00000058897" "ENSMUSG00000021250"
## [7] "ENSMUSG00000063564" "ENSMUSG00000007682" "ENSMUSG00000018451"
## [10] "ENSMUSG00000045005" "ENSMUSG00000024190" "ENSMUSG00000035686"
## [13] "ENSMUSG00000033737" "ENSMUSG00000038418" "ENSMUSG00000034810"
## [16] "ENSMUSG00000017390" "ENSMUSG00000047786" "ENSMUSG00000061718"
## [19] "ENSMUSG00000028195" "ENSMUSG00000030629" "ENSMUSG00000003545"
## [22] "ENSMUSG00000027004" "ENSMUSG00000055653" "ENSMUSG00000052684"
## [25] "ENSMUSG00000034640" "ENSMUSG00000022419" "ENSMUSG00000041891"
## [28] "ENSMUSG00000053560" "ENSMUSG00000050708" "ENSMUSG00000030428"
## [1] ""
## [1] ""
## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_hfd <- JackStraw(object = seurat_campbell_hfd,
num.replicate = 100,
display.progress = TRUE
)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
## Time Elapsed: 2.13899685939153 mins
# Maximum number of PCs allowed = 20.
## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_hfd, PCs = 1:20)
## Warning: Removed 20637 rows containing missing values (geom_point).
## An object of class seurat in project CAMPBELL_HFD
## 23685 genes across 3482 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot
PCElbowPlot(object = seurat_campbell_hfd)
## Cluster cells by PC
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_hfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:57:51
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_hfd)
TSNEPlot(object = seurat_campbell_hfd,
no.legend = TRUE,
do.label = TRUE)
## 1454 variable genes, 20 PC = 14 clusters
## 1454 variable genes, 14 PC = 14 clusters
table(seurat_campbell_hfd@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 744 629 620 308 225 168 148 139 137 133 76 66 51 38
#######
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14,
resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_hfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:57:51
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_hfd)
TSNEPlot(object = seurat_campbell_hfd,
no.legend = TRUE,
do.label = TRUE)
## 1454 variable genes, 20 PC = 16 clusters
## 1454 variable genes, 14 PC = 16 clusters
table(seurat_campbell_hfd@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 738 623 400 226 223 177 156 154 148 139 137 133 76 63 51 38
#######
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14,
resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_hfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:57:51
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_hfd)
TSNEPlot(object = seurat_campbell_hfd,
no.legend = TRUE,
do.label = TRUE)
## 1454 variable genes, 20 PC = 20 clusters
## 1454 variable genes, 14 PC = 18 clusters
table(seurat_campbell_hfd@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 549 400 363 256 227 223 198 167 157 154 148 140 137 133 76 63 51 40
#######
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14,
resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_hfd)
## Parameters used in latest FindClusters calculation run on: 2018-11-20 23:57:51
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_hfd)
TSNEPlot(object = seurat_campbell_hfd,
no.legend = TRUE,
do.label = TRUE)
## 1454 variable genes, 20 PC = 21 clusters
## 1454 variable genes, 14 PC = 21 clusters
table(seurat_campbell_hfd@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## 344 331 244 239 232 227 202 199 191 174 157 152 139 137 133 118 76 66
## 18 19 20
## 51 40 30
## Cluster cells using final parameters (1454 genes, 14 PC, resolution = 1.0)
seurat_campbell_hfd <- FindClusters(object = seurat_campbell_hfd, reduction.type = "pca", dims.use = 1:14, resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
## Produce t-SNE of final parameters:
seurat_campbell_hfd <- RunTSNE(object = seurat_campbell_hfd, dims.use = 1:14, do.fast = TRUE)
Use 1454 variable genes and 14 principal components with a resolution of 1.0 (Changing number of PC makes almost no difference to the clustering). This gives a total of 16 clusters.
## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_hfd_biomarkers <- FindAllMarkers(object = seurat_campbell_hfd, only.pos = FALSE, min.pct = 0.2)
## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_hfd_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 160 x 7
## # Groups: cluster [16]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 4.15e-142 0.909 0.943 0.503 9.82e-138 0 ENSMUSG00000000159
## 2 7.49e-118 0.658 0.973 0.582 1.77e-113 0 ENSMUSG00000047261
## 3 1.07e-106 0.603 0.978 0.774 2.53e-102 0 ENSMUSG00000055430
## 4 9.01e-101 0.605 0.932 0.499 2.14e- 96 0 ENSMUSG00000027500
## 5 8.43e- 93 0.570 0.881 0.427 2.00e- 88 0 ENSMUSG00000048978
## 6 6.28e- 91 0.596 0.64 0.227 1.49e- 86 0 ENSMUSG00000021032
## 7 3.92e- 23 0.896 0.215 0.087 9.27e- 19 0 ENSMUSG00000032291
## 8 1.10e- 22 0.701 0.375 0.203 2.61e- 18 0 ENSMUSG00000020660
## 9 9.08e- 22 0.683 0.404 0.229 2.15e- 17 0 ENSMUSG00000021647
## 10 6.81e- 17 1.09 0.289 0.158 1.61e- 12 0 ENSMUSG00000004366
## # ... with 150 more rows
write.csv(as.data.frame(seurat_campbell_hfd_biomarkers), file = "seurat_campbell_hfd_biomarkers.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_hfd_biomarkers.csv", quote = FALSE)
## Perform ROC DE test. This can take a long time.
seurat_campbell_hfd_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_hfd, only.pos = FALSE, min.pct = 0.2, test.use = "roc")
top10_seurat_campbell_markers_ROC<- seurat_campbell_hfd_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 160 x 9
## # Groups: cluster [16]
## myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0.794 0.909 0.588 0.909 0.943 0.503 NA 0 ENSMUSG00…
## 2 0.771 0.658 0.542 0.658 0.973 0.582 NA 0 ENSMUSG00…
## 3 0.762 0.603 0.524 0.603 0.978 0.774 NA 0 ENSMUSG00…
## 4 0.746 0.605 0.492 0.605 0.932 0.499 NA 0 ENSMUSG00…
## 5 0.737 0.548 0.474 0.548 0.991 0.771 NA 0 ENSMUSG00…
## 6 0.736 0.558 0.472 0.558 0.921 0.474 NA 0 ENSMUSG00…
## 7 0.731 0.570 0.462 0.570 0.881 0.427 NA 0 ENSMUSG00…
## 8 0.729 0.551 0.458 0.551 0.935 0.561 NA 0 ENSMUSG00…
## 9 0.727 0.533 0.454 0.533 0.972 0.547 NA 0 ENSMUSG00…
## 10 0.722 0.555 0.444 0.555 0.989 0.88 NA 0 ENSMUSG00…
## # ... with 150 more rows
write.csv(as.data.frame(seurat_campbell_hfd_biomarkers_ROC), file = "seurat_campbell_hfd_biomarkers_ROC.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_hfd_biomarkers_ROC.csv", quote = FALSE)
## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_hfd,
genes.use = top10_seurat_campbell_markers$gene,
slim.col.label = TRUE,
remove.key = TRUE)
## save seurat object as .rds
saveRDS(seurat_campbell_hfd, file = "./seurat_campbell_hfd_final.rds")
## Load seurat object
seurat_campbell_refed<- readRDS(file = "./seurat_campbell_refed_just_created.rds")
seurat_campbell_refed
## An object of class seurat in project CAMPBELL_REFED
## 19267 genes across 2375 samples.
## Import list of mouse mitocondrially encoded genes:
mito_genes<- read.csv("mito_genes_table.csv", header = TRUE)
## Make list of mitocondrial gene IDS:
mito_genes <- mito_genes$id_with_url
## Identify which mitochondrially expressed gene IDs are present in my dataset:
mito_genes_present<-seurat_campbell_refed@raw.data[mito_genes, ]
mito_genes_present<- mito_genes[!grepl("NA", rownames(mito_genes_present))]
seurat_campbell_refed@raw.data[mito_genes_present, 1:5]
## AAAAATACCGGC AAAAATAGGCGT AAAAATAGGGTT AAAACATTACTT
## ENSMUSG00000000028 0 0 0 0
## ENSMUSG00000000037 0 0 0 0
## ENSMUSG00000000154 0 0 0 0
## ENSMUSG00000000149 0 0 0 0
## ENSMUSG00000000214 0 0 0 1
## ENSMUSG00000000159 0 0 2 2
## ENSMUSG00000000142 0 0 0 0
## ENSMUSG00000000127 0 1 1 0
## ENSMUSG00000000085 1 0 0 0
## ENSMUSG00000000197 0 0 1 1
## ENSMUSG00000000093 0 0 0 0
## ENSMUSG00000000168 0 0 0 0
## ENSMUSG00000000120 0 0 0 0
## ENSMUSG00000000134 0 0 0 0
## ENSMUSG00000000125 0 0 0 0
## ENSMUSG00000000202 0 0 0 0
## ENSMUSG00000000058 0 0 0 0
## ENSMUSG00000000167 0 0 0 0
## ENSMUSG00000000171 1 0 0 0
## ENSMUSG00000000056 0 1 0 0
## ENSMUSG00000000049 0 0 0 0
## ENSMUSG00000000194 0 0 0 0
## ENSMUSG00000000148 0 0 0 0
## ENSMUSG00000000078 1 0 0 0
## ENSMUSG00000000001 0 0 0 0
## ENSMUSG00000000247 0 0 0 0
## ENSMUSG00000000131 0 0 0 0
## ENSMUSG00000000184 0 1 0 0
## ENSMUSG00000000223 0 0 2 0
## ENSMUSG00000000088 0 0 0 0
## ENSMUSG00000000126 0 0 0 0
## ENSMUSG00000000031 0 0 0 0
## AAAAGACTAGTA
## ENSMUSG00000000028 0
## ENSMUSG00000000037 0
## ENSMUSG00000000154 0
## ENSMUSG00000000149 0
## ENSMUSG00000000214 0
## ENSMUSG00000000159 1
## ENSMUSG00000000142 0
## ENSMUSG00000000127 0
## ENSMUSG00000000085 0
## ENSMUSG00000000197 0
## ENSMUSG00000000093 0
## ENSMUSG00000000168 0
## ENSMUSG00000000120 0
## ENSMUSG00000000134 0
## ENSMUSG00000000125 0
## ENSMUSG00000000202 0
## ENSMUSG00000000058 0
## ENSMUSG00000000167 0
## ENSMUSG00000000171 0
## ENSMUSG00000000056 0
## ENSMUSG00000000049 0
## ENSMUSG00000000194 0
## ENSMUSG00000000148 0
## ENSMUSG00000000078 0
## ENSMUSG00000000001 0
## ENSMUSG00000000247 0
## ENSMUSG00000000131 0
## ENSMUSG00000000184 0
## ENSMUSG00000000223 0
## ENSMUSG00000000088 0
## ENSMUSG00000000126 0
## ENSMUSG00000000031 0
dim(seurat_campbell_refed@raw.data[mito_genes_present, ])
## [1] 32 2375
## Calculate the percentage of mitcondrial gene counts per cell
percent_mito <- Matrix::colSums(seurat_campbell_refed@raw.data[mito_genes_present, ])/Matrix::colSums(seurat_campbell_refed@raw.data)
## Basic stats of proportion of mitocondrial gene expression per cell
summary(percent_mito)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0008132 0.0015576 0.0018138 0.0025196 0.0097259
## add percentage mitocondrial genes into metadata
seurat_campbell_refed <- AddMetaData(object = seurat_campbell_refed,
metadata = percent_mito,
col.name = "percent_mito")
## Look at the seurat object meta data
head(seurat_campbell_refed@meta.data)
## nGene nUMI orig.ident replicate_name percent_mito
## AAAAATACCGGC 1271 2224 CAMPBELL_REFED SRR5164442 0.0013519603
## AAAAATAGGCGT 1088 1900 CAMPBELL_REFED SRR5164442 0.0015797788
## AAAAATAGGGTT 3056 6629 CAMPBELL_REFED SRR5164442 0.0009057971
## AAAACATTACTT 841 1535 CAMPBELL_REFED SRR5164442 0.0026058632
## AAAAGACTAGTA 1195 1925 CAMPBELL_REFED SRR5164442 0.0005208333
## AAAAGATCATAC 966 1827 CAMPBELL_REFED SRR5164442 0.0005473454
## QC plots of number of genes, UMIs, and % mitochondria
VlnPlot(object = seurat_campbell_refed,
features.plot = c("nGene", "nUMI", "percent_mito"),
nCol = 3,
x.lab.rot = TRUE,
point.size.use = 0.2
)
## QC plots to show the relationship between nUMIs and relative mitocondrial gene expression or number of genes.
par(mfrow = c(1, 2))
# GenePlot(object = seurat_campbell_refed, gene1 = "nUMI", gene2 = "percent_mito", do.hover = TRUE, pch.use = 1)
# GenePlot(object = seurat_campbell_refed, gene1 = "nUMI", gene2 = "nGene", do.hover = TRUE, pch.use = 1)
GenePlot(object = seurat_campbell_refed, gene1 = "nUMI", gene2 = "percent_mito", pch.use = 16, cex.use = 0.5)
GenePlot(object = seurat_campbell_refed, gene1 = "nUMI", gene2 = "nGene", pch.use = 16, cex.use = 0.5)
Filter cells out with more than 0.4% of total gene expression comming from mitocondrially encoded genes and more than 3500 genes expressed.
## manual check; I already know all cells have >800 genes
table(seurat_campbell_refed@meta.data$percent_mito < 0.004 & seurat_campbell_refed@meta.data$nGene<3500)
##
## FALSE TRUE
## 201 2174
# FALSE TRUE
# 201 2174
## Filter cells with <0.4% percent_mito and <3500 genes
seurat_campbell_refed <- FilterCells(object = seurat_campbell_refed,
subset.names = c("nGene", "percent_mito"),
low.thresholds = c(800, -Inf),
high.thresholds = c(3500, 0.004))
seurat_campbell_refed
## An object of class seurat in project CAMPBELL_REFED
## 19267 genes across 2174 samples.
# An object of class seurat in project CAMPBELL_REFED
# 19267 genes across 2174 samples.
## Plot graph of total expression before normalisation
hist(colSums(seurat_campbell_refed@data),
breaks = 100,
main = "Total expression before normalisation",
xlab = "Sum of expression")
## Normalise gene expression per cell
seurat_campbell_refed <- NormalizeData(object = seurat_campbell_refed, normalization.method = "LogNormalize", scale.factor = 10000)
## Plot graph of total expression after normalisation
hist(colSums(as.data.frame(as.matrix(seurat_campbell_refed@data))),
breaks = 100,
main = "Total expression after normalisation",
xlab = "Sum of expression")
Find genes whose expression varies between cells, which will be used to construct principal componets between cells that will be used for clustering.
## Find variable genes by expression
seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed,
mean.function = ExpMean,
dispersion.function = LogVMR,
x.low.cutoff = 0.05,
x.high.cutoff = 3,
y.cutoff = 0.75,
num.bin = 20,
binning.method = "equal_width"
)
# number of variable genes
length(seurat_campbell_refed@var.genes)
## [1] 1451
seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 4, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1456 variable genes
seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.05, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 2293 variable genes
seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.75, num.bin = 20, binning.method = “equal_width” )
== 1916 variable genes
seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 20, binning.method = “equal_width” )
== 2916 variable genes
seurat_campbell_refed <- FindVariableGenes(object = seurat_campbell_refed, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.02, x.high.cutoff = 3, y.cutoff = 0.5, num.bin = 40, binning.method = “equal_width” )
== 2831 variable gene
Scale gene expression per cell by building linear models for nUMI, percent_mito and mouse replicate.
## Scale data nUMI, percent_mito and mouse replicate
seurat_campbell_refed <- ScaleData(object = seurat_campbell_refed, vars.to.regress = c("nUMI", "percent_mito"))
## Regressing out: nUMI, percent_mito
##
## Time Elapsed: 22.1721687316895 secs
## Scaling data matrix
Principal component anlysis of variable genes for use in cell clustering.
## Perform principal component analysis on variable genes
seurat_campbell_refed <- RunPCA(object = seurat_campbell_refed,
pc.genes = seurat_campbell_refed@var.genes,
do.print = TRUE,
pcs.print = 1:5,
genes.print = 5)
## [1] "PC1"
## [1] "ENSMUSG00000027500" "ENSMUSG00000027350" "ENSMUSG00000035864"
## [4] "ENSMUSG00000040856" "ENSMUSG00000026787"
## [1] ""
## [1] "ENSMUSG00000026385" "ENSMUSG00000031765" "ENSMUSG00000050953"
## [4] "ENSMUSG00000018593" "ENSMUSG00000000567"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000079018" "ENSMUSG00000041378" "ENSMUSG00000040584"
## [4] "ENSMUSG00000020154" "ENSMUSG00000056492"
## [1] ""
## [1] "ENSMUSG00000055254" "ENSMUSG00000067786" "ENSMUSG00000000567"
## [4] "ENSMUSG00000035805" "ENSMUSG00000026701"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000027350" "ENSMUSG00000027800" "ENSMUSG00000075602"
## [4] "ENSMUSG00000023175" "ENSMUSG00000038370"
## [1] ""
## [1] "ENSMUSG00000037625" "ENSMUSG00000036634" "ENSMUSG00000031425"
## [4] "ENSMUSG00000032517" "ENSMUSG00000032854"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000020423" "ENSMUSG00000003545" "ENSMUSG00000022528"
## [4] "ENSMUSG00000022132" "ENSMUSG00000026185"
## [1] ""
## [1] "ENSMUSG00000043164" "ENSMUSG00000070306" "ENSMUSG00000094800"
## [4] "ENSMUSG00000110332" "ENSMUSG00000020473"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000029838" "ENSMUSG00000047786" "ENSMUSG00000055254"
## [4] "ENSMUSG00000063564" "ENSMUSG00000007682"
## [1] ""
## [1] "ENSMUSG00000056973" "ENSMUSG00000028298" "ENSMUSG00000032532"
## [4] "ENSMUSG00000032081" "ENSMUSG00000045394"
## [1] ""
## [1] ""
## visualise top genes associated with principal components
VizPCA(object = seurat_campbell_refed, pcs.use = 1:9)
## Plot principal component 1 v's 2
PCAPlot(object = seurat_campbell_refed, dim.1 = 1, dim.2 = 2)
## Plot principal component 2 v's 3
PCAPlot(object = seurat_campbell_refed, dim.1 = 2, dim.2 = 3)
## Plot heat map for gene expression of principal component 1 genes
PCHeatmap(object = seurat_campbell_refed, pc.use = 1, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 2 genes
PCHeatmap(object = seurat_campbell_refed, pc.use = 2, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 3 genes
PCHeatmap(object = seurat_campbell_refed, pc.use = 3, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot heat map for gene expression of principal component 4 genes
PCHeatmap(object = seurat_campbell_refed, pc.use = 4, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col =
## col.use, : Discrepancy: Rowv is FALSE, while dendrogram is `both'. Omitting
## row dendogram.
## Warning in heatmap.2(data.use, Rowv = NA, Colv = NA, trace = "none", col
## = col.use, : Discrepancy: Colv is FALSE, while dendrogram is `column'.
## Omitting column dendogram.
## Warning in plot.window(...): "dimTitle" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "dimTitle" is not a graphical parameter
## Warning in title(...): "dimTitle" is not a graphical parameter
# Ignore warnings!
## Plot multiple heatmaps of gene expression per PC genes
PCHeatmap(object = seurat_campbell_refed,
pc.use = 5:18,
cells.use = 500,
do.balanced = TRUE,
label.columns = FALSE)
## Takes a pre-computed PCA (from most variable genes identified earlier) and projects this onto the entire dataset (all genes)
seurat_campbell_refed <- ProjectPCA(object = seurat_campbell_refed, do.print = TRUE)
## [1] "PC1"
## [1] "ENSMUSG00000050711" "ENSMUSG00000033061" "ENSMUSG00000044349"
## [4] "ENSMUSG00000021268" "ENSMUSG00000019986" "ENSMUSG00000055430"
## [7] "ENSMUSG00000027581" "ENSMUSG00000021087" "ENSMUSG00000029223"
## [10] "ENSMUSG00000040785" "ENSMUSG00000026576" "ENSMUSG00000043388"
## [13] "ENSMUSG00000024268" "ENSMUSG00000047261" "ENSMUSG00000024261"
## [16] "ENSMUSG00000027273" "ENSMUSG00000019923" "ENSMUSG00000042750"
## [19] "ENSMUSG00000025468" "ENSMUSG00000000159" "ENSMUSG00000036699"
## [22] "ENSMUSG00000002265" "ENSMUSG00000022577" "ENSMUSG00000107169"
## [25] "ENSMUSG00000024423" "ENSMUSG00000025579" "ENSMUSG00000027500"
## [28] "ENSMUSG00000003363" "ENSMUSG00000027350" "ENSMUSG00000033981"
## [1] ""
## [1] "ENSMUSG00000002985" "ENSMUSG00000026385" "ENSMUSG00000031765"
## [4] "ENSMUSG00000050953" "ENSMUSG00000018593" "ENSMUSG00000000567"
## [7] "ENSMUSG00000026728" "ENSMUSG00000001025" "ENSMUSG00000058135"
## [10] "ENSMUSG00000031762" "ENSMUSG00000026701" "ENSMUSG00000035805"
## [13] "ENSMUSG00000030342" "ENSMUSG00000005360" "ENSMUSG00000008575"
## [16] "ENSMUSG00000067786" "ENSMUSG00000018102" "ENSMUSG00000034467"
## [19] "ENSMUSG00000095538" "ENSMUSG00000028565" "ENSMUSG00000017390"
## [22] "ENSMUSG00000053931" "ENSMUSG00000032231" "ENSMUSG00000026547"
## [25] "ENSMUSG00000028517" "ENSMUSG00000036570" "ENSMUSG00000028195"
## [28] "ENSMUSG00000021250" "ENSMUSG00000044080" "ENSMUSG00000062078"
## [1] ""
## [1] ""
## [1] "PC2"
## [1] "ENSMUSG00000079018" "ENSMUSG00000041378" "ENSMUSG00000040584"
## [4] "ENSMUSG00000020154" "ENSMUSG00000056492" "ENSMUSG00000033960"
## [7] "ENSMUSG00000075602" "ENSMUSG00000030237" "ENSMUSG00000001946"
## [10] "ENSMUSG00000042745" "ENSMUSG00000030235" "ENSMUSG00000039167"
## [13] "ENSMUSG00000029648" "ENSMUSG00000020717" "ENSMUSG00000054404"
## [16] "ENSMUSG00000061353" "ENSMUSG00000034738" "ENSMUSG00000006386"
## [19] "ENSMUSG00000022584" "ENSMUSG00000031871" "ENSMUSG00000029802"
## [22] "ENSMUSG00000040732" "ENSMUSG00000114487" "ENSMUSG00000027435"
## [25] "ENSMUSG00000020077" "ENSMUSG00000062960" "ENSMUSG00000032035"
## [28] "ENSMUSG00000054690" "ENSMUSG00000031239" "ENSMUSG00000042116"
## [1] ""
## [1] "ENSMUSG00000018451" "ENSMUSG00000055254" "ENSMUSG00000067786"
## [4] "ENSMUSG00000000567" "ENSMUSG00000035805" "ENSMUSG00000026701"
## [7] "ENSMUSG00000001025" "ENSMUSG00000026385" "ENSMUSG00000031760"
## [10] "ENSMUSG00000017390" "ENSMUSG00000031762" "ENSMUSG00000005360"
## [13] "ENSMUSG00000025666" "ENSMUSG00000034467" "ENSMUSG00000021379"
## [16] "ENSMUSG00000058135" "ENSMUSG00000031765" "ENSMUSG00000050953"
## [19] "ENSMUSG00000095538" "ENSMUSG00000004558" "ENSMUSG00000052727"
## [22] "ENSMUSG00000026223" "ENSMUSG00000036570" "ENSMUSG00000030428"
## [25] "ENSMUSG00000031342" "ENSMUSG00000024411" "ENSMUSG00000026649"
## [28] "ENSMUSG00000037852" "ENSMUSG00000026546" "ENSMUSG00000005089"
## [1] ""
## [1] ""
## [1] "PC3"
## [1] "ENSMUSG00000021268" "ENSMUSG00000050711" "ENSMUSG00000040785"
## [4] "ENSMUSG00000043384" "ENSMUSG00000033061" "ENSMUSG00000018451"
## [7] "ENSMUSG00000019986" "ENSMUSG00000026223" "ENSMUSG00000026576"
## [10] "ENSMUSG00000044349" "ENSMUSG00000027350" "ENSMUSG00000072964"
## [13] "ENSMUSG00000006373" "ENSMUSG00000036438" "ENSMUSG00000027800"
## [16] "ENSMUSG00000024261" "ENSMUSG00000075602" "ENSMUSG00000031428"
## [19] "ENSMUSG00000002265" "ENSMUSG00000022661" "ENSMUSG00000023175"
## [22] "ENSMUSG00000038370" "ENSMUSG00000042745" "ENSMUSG00000070802"
## [25] "ENSMUSG00000042750" "ENSMUSG00000056492" "ENSMUSG00000032181"
## [28] "ENSMUSG00000027712" "ENSMUSG00000024268" "ENSMUSG00000041378"
## [1] ""
## [1] "ENSMUSG00000037625" "ENSMUSG00000036634" "ENSMUSG00000031425"
## [4] "ENSMUSG00000032517" "ENSMUSG00000032854" "ENSMUSG00000076439"
## [7] "ENSMUSG00000041607" "ENSMUSG00000006782" "ENSMUSG00000026830"
## [10] "ENSMUSG00000027199" "ENSMUSG00000031775" "ENSMUSG00000115529"
## [13] "ENSMUSG00000032060" "ENSMUSG00000046160" "ENSMUSG00000020486"
## [16] "ENSMUSG00000056966" "ENSMUSG00000013523" "ENSMUSG00000038173"
## [19] "ENSMUSG00000022425" "ENSMUSG00000027562" "ENSMUSG00000015090"
## [22] "ENSMUSG00000090639" "ENSMUSG00000050121" "ENSMUSG00000032556"
## [25] "ENSMUSG00000027858" "ENSMUSG00000015149" "ENSMUSG00000070354"
## [28] "ENSMUSG00000027375" "ENSMUSG00000047976" "ENSMUSG00000040759"
## [1] ""
## [1] ""
## [1] "PC4"
## [1] "ENSMUSG00000020423" "ENSMUSG00000003545" "ENSMUSG00000022528"
## [4] "ENSMUSG00000022132" "ENSMUSG00000026185" "ENSMUSG00000007682"
## [7] "ENSMUSG00000052837" "ENSMUSG00000028195" "ENSMUSG00000038418"
## [10] "ENSMUSG00000021250" "ENSMUSG00000033737" "ENSMUSG00000030629"
## [13] "ENSMUSG00000028298" "ENSMUSG00000058897" "ENSMUSG00000052387"
## [16] "ENSMUSG00000029838" "ENSMUSG00000047786" "ENSMUSG00000019997"
## [19] "ENSMUSG00000032532" "ENSMUSG00000045005" "ENSMUSG00000044786"
## [22] "ENSMUSG00000032081" "ENSMUSG00000063564" "ENSMUSG00000034640"
## [25] "ENSMUSG00000003949" "ENSMUSG00000019960" "ENSMUSG00000034810"
## [28] "ENSMUSG00000045394" "ENSMUSG00000064351" "ENSMUSG00000022419"
## [1] ""
## [1] "ENSMUSG00000043164" "ENSMUSG00000070306" "ENSMUSG00000094800"
## [4] "ENSMUSG00000110332" "ENSMUSG00000020473" "ENSMUSG00000038370"
## [7] "ENSMUSG00000072674" "ENSMUSG00000032595" "ENSMUSG00000041323"
## [10] "ENSMUSG00000027800" "ENSMUSG00000045655" "ENSMUSG00000044772"
## [13] "ENSMUSG00000033208" "ENSMUSG00000047394" "ENSMUSG00000108841"
## [16] "ENSMUSG00000026683" "ENSMUSG00000047139" "ENSMUSG00000022037"
## [19] "ENSMUSG00000021879" "ENSMUSG00000027744" "ENSMUSG00000029182"
## [22] "ENSMUSG00000051606" "ENSMUSG00000021270" "ENSMUSG00000021950"
## [25] "ENSMUSG00000047361" "ENSMUSG00000044475" "ENSMUSG00000029309"
## [28] "ENSMUSG00000026301" "ENSMUSG00000056174" "ENSMUSG00000021087"
## [1] ""
## [1] ""
## [1] "PC5"
## [1] "ENSMUSG00000018451" "ENSMUSG00000029838" "ENSMUSG00000047786"
## [4] "ENSMUSG00000055254" "ENSMUSG00000063564" "ENSMUSG00000007682"
## [7] "ENSMUSG00000045005" "ENSMUSG00000017390" "ENSMUSG00000030428"
## [10] "ENSMUSG00000056380" "ENSMUSG00000015222" "ENSMUSG00000058897"
## [13] "ENSMUSG00000035686" "ENSMUSG00000027004" "ENSMUSG00000064339"
## [16] "ENSMUSG00000031342" "ENSMUSG00000033737" "ENSMUSG00000038156"
## [19] "ENSMUSG00000005360" "ENSMUSG00000052727" "ENSMUSG00000031626"
## [22] "ENSMUSG00000030905" "ENSMUSG00000058254" "ENSMUSG00000021087"
## [25] "ENSMUSG00000021732" "ENSMUSG00000055430" "ENSMUSG00000026701"
## [28] "ENSMUSG00000005089" "ENSMUSG00000040856" "ENSMUSG00000093460"
## [1] ""
## [1] "ENSMUSG00000056973" "ENSMUSG00000028298" "ENSMUSG00000032532"
## [4] "ENSMUSG00000032081" "ENSMUSG00000045394" "ENSMUSG00000015053"
## [7] "ENSMUSG00000027857" "ENSMUSG00000023043" "ENSMUSG00000066720"
## [10] "ENSMUSG00000039994" "ENSMUSG00000021194" "ENSMUSG00000028023"
## [13] "ENSMUSG00000051367" "ENSMUSG00000049382" "ENSMUSG00000021508"
## [16] "ENSMUSG00000021506" "ENSMUSG00000018569" "ENSMUSG00000052974"
## [19] "ENSMUSG00000031380" "ENSMUSG00000054889" "ENSMUSG00000027350"
## [22] "ENSMUSG00000026185" "ENSMUSG00000000037" "ENSMUSG00000036192"
## [25] "ENSMUSG00000040569" "ENSMUSG00000037664" "ENSMUSG00000039943"
## [28] "ENSMUSG00000068154" "ENSMUSG00000046743" "ENSMUSG00000017723"
## [1] ""
## [1] ""
## Perform jackstraw statistical test to investigate statistically significant PC.
seurat_campbell_refed <- JackStraw(object = seurat_campbell_refed,
num.replicate = 100,
display.progress = TRUE
)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
## Time Elapsed: 1.65351190964381 mins
# Maximum number of PCs allowed = 20.
## Visualise JackStraw plots
JackStrawPlot(object = seurat_campbell_refed, PCs = 1:20)
## Warning: Removed 20801 rows containing missing values (geom_point).
## An object of class seurat in project CAMPBELL_REFED
## 19267 genes across 2174 samples.
## A less computationally intensive heuristic method for finding the statistically significant PCAs is using an elbow plot
PCElbowPlot(object = seurat_campbell_refed)
## Cluster cells by PC
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13, resolution = 0.6, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_refed)
## Parameters used in latest FindClusters calculation run on: 2018-11-21 00:44:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_refed)
TSNEPlot(object = seurat_campbell_refed,
no.legend = TRUE,
do.label = TRUE)
## 1451 variable genes, 13 PC = 9 clusters
table(seurat_campbell_refed@ident)
##
## 0 1 2 3 4 5 6 7 8
## 732 385 334 304 112 100 96 75 36
#######
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13,
resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_refed)
## Parameters used in latest FindClusters calculation run on: 2018-11-21 00:44:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_refed)
TSNEPlot(object = seurat_campbell_refed,
no.legend = TRUE,
do.label = TRUE)
## 1451 variable genes, 20 PC = 11 clusters
table(seurat_campbell_refed@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 376 374 370 324 233 111 100 95 79 76 36
#######
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13,
resolution = 1.5, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_refed)
## Parameters used in latest FindClusters calculation run on: 2018-11-21 00:44:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_refed)
TSNEPlot(object = seurat_campbell_refed,
no.legend = TRUE,
do.label = TRUE)
## 1451 variable genes, 20 PC = 14 clusters
table(seurat_campbell_refed@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 351 335 263 224 219 206 111 100 80 77 77 76 36 19
#######
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13,
resolution = 2.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
PrintFindClustersParams(object = seurat_campbell_refed)
## Parameters used in latest FindClusters calculation run on: 2018-11-21 00:44:18
## =============================================================================
## Resolution: 0.6
## -----------------------------------------------------------------------------
## Modularity Function Algorithm n.start n.iter
## 1 1 100 10
## -----------------------------------------------------------------------------
## Reduction used k.param prune.SNN
## pca 30 0.0667
## -----------------------------------------------------------------------------
## Dims used in calculation
## =============================================================================
## 1 2 3 4 5 6 7 8 9 10 11 12 13
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)
# TSNEPlot(object = seurat_campbell_refed)
TSNEPlot(object = seurat_campbell_refed,
no.legend = TRUE,
do.label = TRUE)
## 1451 variable genes, 20 PC = 15 clusters
table(seurat_campbell_refed@ident)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## 340 255 205 204 203 183 181 111 100 81 77 76 73 49 36
## Cluster cells using final parameters (1451 genes, 13 PC, resolution = 1.0)
seurat_campbell_refed <- FindClusters(object = seurat_campbell_refed, reduction.type = "pca", dims.use = 1:13, resolution = 1.0, print.output = 0, save.SNN = TRUE, force.recalc = TRUE)
## Produce t-SNE of final parameters:
seurat_campbell_refed <- RunTSNE(object = seurat_campbell_refed, dims.use = 1:13, do.fast = TRUE)
Use 1451 variable genes and 13 principal components with a resolution of 1.0. This gives a total of 11 clusters.
## Find markers for every cluster compared to all remaining cells, report both positive and negative genes.
seurat_campbell_refed_biomarkers <- FindAllMarkers(object = seurat_campbell_refed, only.pos = FALSE, min.pct = 0.2)
## Get the top 10 biomarkers per cluster
top10_seurat_campbell_markers<- seurat_campbell_refed_biomarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers
## # A tibble: 110 x 7
## # Groups: cluster [11]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 1.13e-283 2.92 0.995 0.152 2.17e-279 0 ENSMUSG00000005705
## 2 5.73e-242 3.36 0.992 0.282 1.10e-237 0 ENSMUSG00000029819
## 3 3.71e-211 1.88 0.657 0.042 7.14e-207 0 ENSMUSG00000026834
## 4 2.23e-187 2.04 0.71 0.084 4.29e-183 0 ENSMUSG00000021091
## 5 1.06e-132 1.44 0.819 0.231 2.04e-128 0 ENSMUSG00000051159
## 6 3.36e-108 1.19 0.455 0.049 6.47e-104 0 ENSMUSG00000021685
## 7 1.88e-100 1.17 0.803 0.284 3.62e- 96 0 ENSMUSG00000027523
## 8 6.34e- 92 1.10 0.434 0.058 1.22e- 87 0 ENSMUSG00000055737
## 9 3.46e- 86 1.17 0.622 0.169 6.67e- 82 0 ENSMUSG00000054667
## 10 4.68e- 81 1.23 0.726 0.272 9.02e- 77 0 ENSMUSG00000026360
## # ... with 100 more rows
write.csv(as.data.frame(seurat_campbell_refed_biomarkers), file = "seurat_campbell_refed_biomarkers.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers), file = "top10_seurat_campbell_refed_biomarkers.csv", quote = FALSE)
## Perform ROC DE test. This can take a long time.
seurat_campbell_refed_biomarkers_ROC <- FindAllMarkers(object = seurat_campbell_refed, only.pos = FALSE, min.pct = 0.2, test.use = "roc")
top10_seurat_campbell_markers_ROC<- seurat_campbell_refed_biomarkers_ROC %>% group_by(cluster) %>% top_n(10, avg_logFC)
top10_seurat_campbell_markers_ROC
## # A tibble: 95 x 9
## # Groups: cluster [11]
## myAUC avg_diff power avg_logFC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0.983 3.36 0.966 3.36 0.992 0.282 NA 0 ENSMUSG00…
## 2 0.977 2.92 0.954 2.92 0.995 0.152 NA 0 ENSMUSG00…
## 3 0.836 1.44 0.672 1.44 0.819 0.231 NA 0 ENSMUSG00…
## 4 0.829 2.04 0.658 2.04 0.71 0.084 NA 0 ENSMUSG00…
## 5 0.814 1.88 0.628 1.88 0.657 0.042 NA 0 ENSMUSG00…
## 6 0.803 1.17 0.606 1.17 0.803 0.284 NA 0 ENSMUSG00…
## 7 0.766 1.23 0.532 1.23 0.726 0.272 NA 0 ENSMUSG00…
## 8 0.744 1.17 0.488 1.17 0.622 0.169 NA 0 ENSMUSG00…
## 9 0.722 1.10 0.444 1.10 0.62 0.231 NA 0 ENSMUSG00…
## 10 0.704 1.19 0.408 1.19 0.455 0.049 NA 0 ENSMUSG00…
## # ... with 85 more rows
write.csv(as.data.frame(seurat_campbell_refed_biomarkers_ROC), file = "seurat_campbell_refed_biomarkers_ROC.csv", quote = FALSE)
write.csv(as.data.frame(top10_seurat_campbell_markers_ROC), file = "top10_seurat_campbell_refed_biomarkers_ROC.csv", quote = FALSE)
## Plot heatmap of top 10 DE genes
DoHeatmap(object = seurat_campbell_refed,
genes.use = top10_seurat_campbell_markers$gene,
slim.col.label = TRUE,
remove.key = TRUE)
## save seurat object as .rds
saveRDS(seurat_campbell_refed, file = "./seurat_campbell_refed_final.rds")